Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 152 results for author: <span class="mathjax">Clark, J</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Clark%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Clark, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Clark%2C+J&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Clark, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Clark%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13117">arXiv:2502.13117</a> <span> [<a href="https://arxiv.org/pdf/2502.13117">pdf</a>, <a href="https://arxiv.org/format/2502.13117">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Performance Evaluation of Large Language Models in Statistical Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyi Song</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+K">Kexin Xie</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+L">Lina Lee</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruizhe Chen</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+M">Jared M. Clark</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Hao He</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Haoran He</a>, <a href="/search/cs?searchtype=author&query=Min%2C+J">Jie Min</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinlei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Simin Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xinwei Deng</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yili Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13117v1-abstract-short" style="display: inline;"> The programming capabilities of large language models (LLMs) have revolutionized automatic code generation and opened new avenues for automatic statistical analysis. However, the validity and quality of these generated codes need to be systematically evaluated before they can be widely adopted. Despite their growing prominence, a comprehensive evaluation of statistical code generated by LLMs remai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13117v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13117v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13117v1-abstract-full" style="display: none;"> The programming capabilities of large language models (LLMs) have revolutionized automatic code generation and opened new avenues for automatic statistical analysis. However, the validity and quality of these generated codes need to be systematically evaluated before they can be widely adopted. Despite their growing prominence, a comprehensive evaluation of statistical code generated by LLMs remains scarce in the literature. In this paper, we assess the performance of LLMs, including two versions of ChatGPT and one version of Llama, in the domain of SAS programming for statistical analysis. Our study utilizes a set of statistical analysis tasks encompassing diverse statistical topics and datasets. Each task includes a problem description, dataset information, and human-verified SAS code. We conduct a comprehensive assessment of the quality of SAS code generated by LLMs through human expert evaluation based on correctness, effectiveness, readability, executability, and the accuracy of output results. The analysis of rating scores reveals that while LLMs demonstrate usefulness in generating syntactically correct code, they struggle with tasks requiring deep domain understanding and may produce redundant or incorrect results. This study offers valuable insights into the capabilities and limitations of LLMs in statistical programming, providing guidance for future advancements in AI-assisted coding systems for statistical analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13117v1-abstract-full').style.display = 'none'; document.getElementById('2502.13117v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12386">arXiv:2502.12386</a> <span> [<a href="https://arxiv.org/pdf/2502.12386">pdf</a>, <a href="https://arxiv.org/format/2502.12386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bridging the Data Gap in AI Reliability Research and Establishing DR-AIR, a Comprehensive Data Repository for AI Reliability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Simin Zheng</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+M">Jared M. Clark</a>, <a href="/search/cs?searchtype=author&query=Salboukh%2C+F">Fatemeh Salboukh</a>, <a href="/search/cs?searchtype=author&query=Silva%2C+P">Priscila Silva</a>, <a href="/search/cs?searchtype=author&query=da+Mata%2C+K">Karen da Mata</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+F">Fenglian Pan</a>, <a href="/search/cs?searchtype=author&query=Min%2C+J">Jie Min</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+J">Jiayi Lian</a>, <a href="/search/cs?searchtype=author&query=King%2C+C+B">Caleb B. King</a>, <a href="/search/cs?searchtype=author&query=Fiondella%2C+L">Lance Fiondella</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xinwei Deng</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yili Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12386v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) technology and systems have been advancing rapidly. However, ensuring the reliability of these systems is crucial for fostering public confidence in their use. This necessitates the modeling and analysis of reliability data specific to AI systems. A major challenge in AI reliability research, particularly for those in academia, is the lack of readily available AI relia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12386v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12386v1-abstract-full" style="display: none;"> Artificial intelligence (AI) technology and systems have been advancing rapidly. However, ensuring the reliability of these systems is crucial for fostering public confidence in their use. This necessitates the modeling and analysis of reliability data specific to AI systems. A major challenge in AI reliability research, particularly for those in academia, is the lack of readily available AI reliability data. To address this gap, this paper focuses on conducting a comprehensive review of available AI reliability data and establishing DR-AIR: a data repository for AI reliability. Specifically, we introduce key measurements and data types for assessing AI reliability, along with the methodologies used to collect these data. We also provide a detailed description of the currently available datasets with illustrative examples. Furthermore, we outline the setup of the DR-AIR repository and demonstrate its practical applications. This repository provides easy access to datasets specifically curated for AI reliability research. We believe these efforts will significantly benefit the AI research community by facilitating access to valuable reliability data and promoting collaboration across various academic domains within AI. We conclude our paper with a call to action, encouraging the research community to contribute and share AI reliability data to further advance this critical field of study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12386v1-abstract-full').style.display = 'none'; document.getElementById('2502.12386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05352">arXiv:2502.05352</a> <span> [<a href="https://arxiv.org/pdf/2502.05352">pdf</a>, <a href="https://arxiv.org/format/2502.05352">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> ITBench: Evaluating AI Agents across Diverse Real-World IT Automation Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jha%2C+S">Saurabh Jha</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+R">Rohan Arora</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+Y">Yuji Watanabe</a>, <a href="/search/cs?searchtype=author&query=Yanagawa%2C+T">Takumi Yanagawa</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yinfang Chen</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jackson Clark</a>, <a href="/search/cs?searchtype=author&query=Bhavya%2C+B">Bhavya Bhavya</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+M">Mudit Verma</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+H">Harshit Kumar</a>, <a href="/search/cs?searchtype=author&query=Kitahara%2C+H">Hirokuni Kitahara</a>, <a href="/search/cs?searchtype=author&query=Zheutlin%2C+N">Noah Zheutlin</a>, <a href="/search/cs?searchtype=author&query=Takano%2C+S">Saki Takano</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+D">Divya Pathak</a>, <a href="/search/cs?searchtype=author&query=George%2C+F">Felix George</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xinbo Wu</a>, <a href="/search/cs?searchtype=author&query=Turkkan%2C+B+O">Bekir O. Turkkan</a>, <a href="/search/cs?searchtype=author&query=Vanloo%2C+G">Gerard Vanloo</a>, <a href="/search/cs?searchtype=author&query=Nidd%2C+M">Michael Nidd</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+T">Ting Dai</a>, <a href="/search/cs?searchtype=author&query=Chatterjee%2C+O">Oishik Chatterjee</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranjal Gupta</a>, <a href="/search/cs?searchtype=author&query=Samanta%2C+S">Suranjana Samanta</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+P">Pooja Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+R">Rong Lee</a>, <a href="/search/cs?searchtype=author&query=Murali%2C+P">Pavankumar Murali</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05352v1-abstract-short" style="display: inline;"> Realizing the vision of using AI agents to automate critical IT tasks depends on the ability to measure and understand effectiveness of proposed solutions. We introduce ITBench, a framework that offers a systematic methodology for benchmarking AI agents to address real-world IT automation tasks. Our initial release targets three key areas: Site Reliability Engineering (SRE), Compliance and Securit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05352v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05352v1-abstract-full" style="display: none;"> Realizing the vision of using AI agents to automate critical IT tasks depends on the ability to measure and understand effectiveness of proposed solutions. We introduce ITBench, a framework that offers a systematic methodology for benchmarking AI agents to address real-world IT automation tasks. Our initial release targets three key areas: Site Reliability Engineering (SRE), Compliance and Security Operations (CISO), and Financial Operations (FinOps). The design enables AI researchers to understand the challenges and opportunities of AI agents for IT automation with push-button workflows and interpretable metrics. ITBench includes an initial set of 94 real-world scenarios, which can be easily extended by community contributions. Our results show that agents powered by state-of-the-art models resolve only 13.8% of SRE scenarios, 25.2% of CISO scenarios, and 0% of FinOps scenarios. We expect ITBench to be a key enabler of AI-driven IT automation that is correct, safe, and fast. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05352v1-abstract-full').style.display = 'none'; document.getElementById('2502.05352v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03729">arXiv:2502.03729</a> <span> [<a href="https://arxiv.org/pdf/2502.03729">pdf</a>, <a href="https://arxiv.org/format/2502.03729">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Action-Free Reasoning for Policy Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jaden Clark</a>, <a href="/search/cs?searchtype=author&query=Mirchandani%2C+S">Suvir Mirchandani</a>, <a href="/search/cs?searchtype=author&query=Sadigh%2C+D">Dorsa Sadigh</a>, <a href="/search/cs?searchtype=author&query=Belkhale%2C+S">Suneel Belkhale</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03729v2-abstract-short" style="display: inline;"> End-to-end imitation learning offers a promising approach for training robot policies. However, generalizing to new settings remains a significant challenge. Although large-scale robot demonstration datasets have shown potential for inducing generalization, they are resource-intensive to scale. In contrast, human video data is abundant and diverse, presenting an attractive alternative. Yet, these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03729v2-abstract-full').style.display = 'inline'; document.getElementById('2502.03729v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03729v2-abstract-full" style="display: none;"> End-to-end imitation learning offers a promising approach for training robot policies. However, generalizing to new settings remains a significant challenge. Although large-scale robot demonstration datasets have shown potential for inducing generalization, they are resource-intensive to scale. In contrast, human video data is abundant and diverse, presenting an attractive alternative. Yet, these human-video datasets lack action labels, complicating their use in imitation learning. Existing methods attempt to extract grounded action representations (e.g., hand poses), but resulting policies struggle to bridge the embodiment gap between human and robot actions. We propose an alternative approach: leveraging language-based reasoning from human videos-essential for guiding robot actions-to train generalizable robot policies. Building on recent advances in reasoning-based policy architectures, we introduce Reasoning through Action-free Data (RAD). RAD learns from both robot demonstration data (with reasoning and action labels) and action-free human video data (with only reasoning labels). The robot data teaches the model to map reasoning to low-level actions, while the action-free data enhances reasoning capabilities. Additionally, we will release a new dataset of 3,377 human-hand demonstrations with reasoning annotations compatible with the Bridge V2 benchmark and aimed at facilitating future research on reasoning-driven robot learning. Our experiments show that RAD enables effective transfer across the embodiment gap, allowing robots to perform tasks seen only in action-free data. Furthermore, scaling up action-free reasoning data significantly improves policy performance and generalization to novel tasks. These results highlight the promise of reasoning-driven learning from action-free datasets for advancing generalizable robot control. Project page: https://rad-generalization.github.io <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03729v2-abstract-full').style.display = 'none'; document.getElementById('2502.03729v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03717">arXiv:2502.03717</a> <span> [<a href="https://arxiv.org/pdf/2502.03717">pdf</a>, <a href="https://arxiv.org/format/2502.03717">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficiently Generating Expressive Quadruped Behaviors via Language-Guided Preference Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jaden Clark</a>, <a href="/search/cs?searchtype=author&query=Hejna%2C+J">Joey Hejna</a>, <a href="/search/cs?searchtype=author&query=Sadigh%2C+D">Dorsa Sadigh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03717v1-abstract-short" style="display: inline;"> Expressive robotic behavior is essential for the widespread acceptance of robots in social environments. Recent advancements in learned legged locomotion controllers have enabled more dynamic and versatile robot behaviors. However, determining the optimal behavior for interactions with different users across varied scenarios remains a challenge. Current methods either rely on natural language inpu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03717v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03717v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03717v1-abstract-full" style="display: none;"> Expressive robotic behavior is essential for the widespread acceptance of robots in social environments. Recent advancements in learned legged locomotion controllers have enabled more dynamic and versatile robot behaviors. However, determining the optimal behavior for interactions with different users across varied scenarios remains a challenge. Current methods either rely on natural language input, which is efficient but low-resolution, or learn from human preferences, which, although high-resolution, is sample inefficient. This paper introduces a novel approach that leverages priors generated by pre-trained LLMs alongside the precision of preference learning. Our method, termed Language-Guided Preference Learning (LGPL), uses LLMs to generate initial behavior samples, which are then refined through preference-based feedback to learn behaviors that closely align with human expectations. Our core insight is that LLMs can guide the sampling process for preference learning, leading to a substantial improvement in sample efficiency. We demonstrate that LGPL can quickly learn accurate and expressive behaviors with as few as four queries, outperforming both purely language-parameterized models and traditional preference learning approaches. Website with videos: https://lgpl-gaits.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03717v1-abstract-full').style.display = 'none'; document.getElementById('2502.03717v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18810">arXiv:2501.18810</a> <span> [<a href="https://arxiv.org/pdf/2501.18810">pdf</a>, <a href="https://arxiv.org/format/2501.18810">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Quest Love: Do Blockchain Points Build Loyalty or Just Feed the Bots? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Al-Chami%2C+J">Joseph Al-Chami</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jeremy Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18810v1-abstract-short" style="display: inline;"> Blockchain ecosystems -- such as those built around chains, layers, and services -- try to engage users for a variety of reasons: user education, growing and protecting their market share, climbing metric-measuring leaderboards with competing systems, demonstrating usage to investors, and identifying worthy recipients for newly created tokens (airdrops). A popular approach is offering user quests:… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18810v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18810v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18810v1-abstract-full" style="display: none;"> Blockchain ecosystems -- such as those built around chains, layers, and services -- try to engage users for a variety of reasons: user education, growing and protecting their market share, climbing metric-measuring leaderboards with competing systems, demonstrating usage to investors, and identifying worthy recipients for newly created tokens (airdrops). A popular approach is offering user quests: small tasks that can be completed by a user, exposing them to a common task they might want to do in the future, and rewarding them for completion. In this paper, we capture blockchain data from one deployed quest system that offered 43 unique quests over 10 months with 80M completions. We use this data to offer insight about the factors that impact task completion: amount of reward, monetary value of reward, difficulty, and cost. We also discuss the role of farming and bots, and the factors that complicate distinguishing real users from automated scripts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18810v1-abstract-full').style.display = 'none'; document.getElementById('2501.18810v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05717">arXiv:2501.05717</a> <span> [<a href="https://arxiv.org/pdf/2501.05717">pdf</a>, <a href="https://arxiv.org/format/2501.05717">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Zero-shot Shark Tracking and Biometrics from Aerial Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lalgudi%2C+C+K">Chinmay K Lalgudi</a>, <a href="/search/cs?searchtype=author&query=Leone%2C+M+E">Mark E Leone</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+V">Jaden V Clark</a>, <a href="/search/cs?searchtype=author&query=Madrigal-Mora%2C+S">Sergio Madrigal-Mora</a>, <a href="/search/cs?searchtype=author&query=Espinoza%2C+M">Mario Espinoza</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05717v1-abstract-short" style="display: inline;"> The recent widespread adoption of drones for studying marine animals provides opportunities for deriving biological information from aerial imagery. The large scale of imagery data acquired from drones is well suited for machine learning (ML) analysis. Development of ML models for analyzing marine animal aerial imagery has followed the classical paradigm of training, testing, and deploying a new m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05717v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05717v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05717v1-abstract-full" style="display: none;"> The recent widespread adoption of drones for studying marine animals provides opportunities for deriving biological information from aerial imagery. The large scale of imagery data acquired from drones is well suited for machine learning (ML) analysis. Development of ML models for analyzing marine animal aerial imagery has followed the classical paradigm of training, testing, and deploying a new model for each dataset, requiring significant time, human effort, and ML expertise. We introduce Frame Level ALIgment and tRacking (FLAIR), which leverages the video understanding of Segment Anything Model 2 (SAM2) and the vision-language capabilities of Contrastive Language-Image Pre-training (CLIP). FLAIR takes a drone video as input and outputs segmentation masks of the species of interest across the video. Notably, FLAIR leverages a zero-shot approach, eliminating the need for labeled data, training a new model, or fine-tuning an existing model to generalize to other species. With a dataset of 18,000 drone images of Pacific nurse sharks, we trained state-of-the-art object detection models to compare against FLAIR. We show that FLAIR massively outperforms these object detectors and performs competitively against two human-in-the-loop methods for prompting SAM2, achieving a Dice score of 0.81. FLAIR readily generalizes to other shark species without additional human effort and can be combined with novel heuristics to automatically extract relevant information including length and tailbeat frequency. FLAIR has significant potential to accelerate aerial imagery analysis workflows, requiring markedly less human effort and expertise than traditional machine learning workflows, while achieving superior accuracy. By reducing the effort required for aerial imagery analysis, FLAIR allows scientists to spend more time interpreting results and deriving insights about marine ecosystems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05717v1-abstract-full').style.display = 'none'; document.getElementById('2501.05717v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13678">arXiv:2412.13678</a> <span> [<a href="https://arxiv.org/pdf/2412.13678">pdf</a>, <a href="https://arxiv.org/format/2412.13678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Clio: Privacy-Preserving Insights into Real-World AI Use </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tamkin%2C+A">Alex Tamkin</a>, <a href="/search/cs?searchtype=author&query=McCain%2C+M">Miles McCain</a>, <a href="/search/cs?searchtype=author&query=Handa%2C+K">Kunal Handa</a>, <a href="/search/cs?searchtype=author&query=Durmus%2C+E">Esin Durmus</a>, <a href="/search/cs?searchtype=author&query=Lovitt%2C+L">Liane Lovitt</a>, <a href="/search/cs?searchtype=author&query=Rathi%2C+A">Ankur Rathi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Saffron Huang</a>, <a href="/search/cs?searchtype=author&query=Mountfield%2C+A">Alfred Mountfield</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+J">Jerry Hong</a>, <a href="/search/cs?searchtype=author&query=Ritchie%2C+S">Stuart Ritchie</a>, <a href="/search/cs?searchtype=author&query=Stern%2C+M">Michael Stern</a>, <a href="/search/cs?searchtype=author&query=Clarke%2C+B">Brian Clarke</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+L">Landon Goldberg</a>, <a href="/search/cs?searchtype=author&query=Sumers%2C+T+R">Theodore R. Sumers</a>, <a href="/search/cs?searchtype=author&query=Mueller%2C+J">Jared Mueller</a>, <a href="/search/cs?searchtype=author&query=McEachen%2C+W">William McEachen</a>, <a href="/search/cs?searchtype=author&query=Mitchell%2C+W">Wes Mitchell</a>, <a href="/search/cs?searchtype=author&query=Carter%2C+S">Shan Carter</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jack Clark</a>, <a href="/search/cs?searchtype=author&query=Kaplan%2C+J">Jared Kaplan</a>, <a href="/search/cs?searchtype=author&query=Ganguli%2C+D">Deep Ganguli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13678v1-abstract-short" style="display: inline;"> How are AI assistants being used in the real world? While model providers in theory have a window into this impact via their users' data, both privacy concerns and practical challenges have made analyzing this data difficult. To address these issues, we present Clio (Claude insights and observations), a privacy-preserving platform that uses AI assistants themselves to analyze and surface aggregate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13678v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13678v1-abstract-full" style="display: none;"> How are AI assistants being used in the real world? While model providers in theory have a window into this impact via their users' data, both privacy concerns and practical challenges have made analyzing this data difficult. To address these issues, we present Clio (Claude insights and observations), a privacy-preserving platform that uses AI assistants themselves to analyze and surface aggregated usage patterns across millions of conversations, without the need for human reviewers to read raw conversations. We validate this can be done with a high degree of accuracy and privacy by conducting extensive evaluations. We demonstrate Clio's usefulness in two broad ways. First, we share insights about how models are being used in the real world from one million Claude.ai Free and Pro conversations, ranging from providing advice on hairstyles to providing guidance on Git operations and concepts. We also identify the most common high-level use cases on Claude.ai (coding, writing, and research tasks) as well as patterns that differ across languages (e.g., conversations in Japanese discuss elder care and aging populations at higher-than-typical rates). Second, we use Clio to make our systems safer by identifying coordinated attempts to abuse our systems, monitoring for unknown unknowns during critical periods like launches of new capabilities or major world events, and improving our existing monitoring systems. We also discuss the limitations of our approach, as well as risks and ethical concerns. By enabling analysis of real-world AI usage, Clio provides a scalable platform for empirically grounded AI safety and governance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13678v1-abstract-full').style.display = 'none'; document.getElementById('2412.13678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16973">arXiv:2411.16973</a> <span> [<a href="https://arxiv.org/pdf/2411.16973">pdf</a>, <a href="https://arxiv.org/format/2411.16973">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> SEMU-Net: A Segmentation-based Corrector for Fabrication Process Variations of Nanophotonics with Microscopic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Azimi%2C+R">Rambod Azimi</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+Y">Yijian Kong</a>, <a href="/search/cs?searchtype=author&query=Gostimirovic%2C+D">Dusan Gostimirovic</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a>, <a href="/search/cs?searchtype=author&query=Liboiron-Ladouceur%2C+O">Odile Liboiron-Ladouceur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16973v1-abstract-short" style="display: inline;"> Integrated silicon photonic devices, which manipulate light to transmit and process information on a silicon-on-insulator chip, are highly sensitive to structural variations. Minor deviations during nanofabrication-the precise process of building structures at the nanometer scale-such as over- or under-etching, corner rounding, and unintended defects, can significantly impact performance. To addre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16973v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16973v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16973v1-abstract-full" style="display: none;"> Integrated silicon photonic devices, which manipulate light to transmit and process information on a silicon-on-insulator chip, are highly sensitive to structural variations. Minor deviations during nanofabrication-the precise process of building structures at the nanometer scale-such as over- or under-etching, corner rounding, and unintended defects, can significantly impact performance. To address these challenges, we introduce SEMU-Net, a comprehensive set of methods that automatically segments scanning electron microscope images (SEM) and uses them to train two deep neural network models based on U-Net and its variants. The predictor model anticipates fabrication-induced variations, while the corrector model adjusts the design to address these issues, ensuring that the final fabricated structures closely align with the intended specifications. Experimental results show that the segmentation U-Net reaches an average IoU score of 99.30%, while the corrector attention U-Net in a tandem architecture achieves an average IoU score of 98.67%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16973v1-abstract-full').style.display = 'none'; document.getElementById('2411.16973v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12773">arXiv:2411.12773</a> <span> [<a href="https://arxiv.org/pdf/2411.12773">pdf</a>, <a href="https://arxiv.org/format/2411.12773">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Decoupling Training-Free Guided Diffusion by ADMM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Youyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zehua Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zenan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyu Li</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a>, <a href="/search/cs?searchtype=author&query=Si%2C+X">Xujie Si</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12773v1-abstract-short" style="display: inline;"> In this paper, we consider the conditional generation problem by guiding off-the-shelf unconditional diffusion models with differentiable loss functions in a plug-and-play fashion. While previous research has primarily focused on balancing the unconditional diffusion model and the guided loss through a tuned weight hyperparameter, we propose a novel framework that distinctly decouples these two co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12773v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12773v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12773v1-abstract-full" style="display: none;"> In this paper, we consider the conditional generation problem by guiding off-the-shelf unconditional diffusion models with differentiable loss functions in a plug-and-play fashion. While previous research has primarily focused on balancing the unconditional diffusion model and the guided loss through a tuned weight hyperparameter, we propose a novel framework that distinctly decouples these two components. Specifically, we introduce two variables ${x}$ and ${z}$, to represent the generated samples governed by the unconditional generation model and the guidance function, respectively. This decoupling reformulates conditional generation into two manageable subproblems, unified by the constraint ${x} = {z}$. Leveraging this setup, we develop a new algorithm based on the Alternating Direction Method of Multipliers (ADMM) to adaptively balance these components. Additionally, we establish the equivalence between the diffusion reverse step and the proximal operator of ADMM and provide a detailed convergence analysis of our algorithm under certain mild assumptions. Our experiments demonstrate that our proposed method ADMMDiff consistently generates high-quality samples while ensuring strong adherence to the conditioning criteria. It outperforms existing methods across a range of conditional generation tasks, including image generation with various guidance and controllable motion synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12773v1-abstract-full').style.display = 'none'; document.getElementById('2411.12773v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11774">arXiv:2411.11774</a> <span> [<a href="https://arxiv.org/pdf/2411.11774">pdf</a>, <a href="https://arxiv.org/ps/2411.11774">ps</a>, <a href="https://arxiv.org/format/2411.11774">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Requirements of Clinicians for Explainable AI Decision Support Systems in Intensive Care </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J+N">Jeffrey N. Clark</a>, <a href="/search/cs?searchtype=author&query=Wragg%2C+M">Matthew Wragg</a>, <a href="/search/cs?searchtype=author&query=Nielsen%2C+E">Emily Nielsen</a>, <a href="/search/cs?searchtype=author&query=Perello-Nieto%2C+M">Miquel Perello-Nieto</a>, <a href="/search/cs?searchtype=author&query=Keshtmand%2C+N">Nawid Keshtmand</a>, <a href="/search/cs?searchtype=author&query=Ambler%2C+M">Michael Ambler</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Shiv Sharma</a>, <a href="/search/cs?searchtype=author&query=Bourdeaux%2C+C+P">Christopher P. Bourdeaux</a>, <a href="/search/cs?searchtype=author&query=Brigden%2C+A">Amberly Brigden</a>, <a href="/search/cs?searchtype=author&query=Santos-Rodriguez%2C+R">Raul Santos-Rodriguez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11774v1-abstract-short" style="display: inline;"> There is a growing need to understand how digital systems can support clinical decision-making, particularly as artificial intelligence (AI) models become increasingly complex and less human-interpretable. This complexity raises concerns about trustworthiness, impacting safe and effective adoption of such technologies. Improved understanding of decision-making processes and requirements for explan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11774v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11774v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11774v1-abstract-full" style="display: none;"> There is a growing need to understand how digital systems can support clinical decision-making, particularly as artificial intelligence (AI) models become increasingly complex and less human-interpretable. This complexity raises concerns about trustworthiness, impacting safe and effective adoption of such technologies. Improved understanding of decision-making processes and requirements for explanations coming from decision support tools is a vital component in providing effective explainable solutions. This is particularly relevant in the data-intensive, fast-paced environments of intensive care units (ICUs). To explore these issues, group interviews were conducted with seven ICU clinicians, representing various roles and experience levels. Thematic analysis revealed three core themes: (T1) ICU decision-making relies on a wide range of factors, (T2) the complexity of patient state is challenging for shared decision-making, and (T3) requirements and capabilities of AI decision support systems. We include design recommendations from clinical input, providing insights to inform future AI systems for intensive care. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11774v1-abstract-full').style.display = 'none'; document.getElementById('2411.11774v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17088">arXiv:2410.17088</a> <span> [<a href="https://arxiv.org/pdf/2410.17088">pdf</a>, <a href="https://arxiv.org/format/2410.17088">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Science Out of Its Ivory Tower: Improving Accessibility with Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haining Wang</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jason Clark</a>, <a href="/search/cs?searchtype=author&query=McKelvey%2C+H">Hannah McKelvey</a>, <a href="/search/cs?searchtype=author&query=Sterman%2C+L">Leila Sterman</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zheng Gao</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zuoyu Tian</a>, <a href="/search/cs?searchtype=author&query=K%C3%BCbler%2C+S">Sandra K眉bler</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaozhong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17088v1-abstract-short" style="display: inline;"> A vast amount of scholarly work is published daily, yet much of it remains inaccessible to the general public due to dense jargon and complex language. To address this challenge in science communication, we introduce a reinforcement learning framework that fine-tunes a language model to rewrite scholarly abstracts into more comprehensible versions. Guided by a carefully balanced combination of wor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17088v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17088v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17088v1-abstract-full" style="display: none;"> A vast amount of scholarly work is published daily, yet much of it remains inaccessible to the general public due to dense jargon and complex language. To address this challenge in science communication, we introduce a reinforcement learning framework that fine-tunes a language model to rewrite scholarly abstracts into more comprehensible versions. Guided by a carefully balanced combination of word- and sentence-level accessibility rewards, our language model effectively substitutes technical terms with more accessible alternatives, a task which models supervised fine-tuned or guided by conventional readability measures struggle to accomplish. Our best model adjusts the readability level of scholarly abstracts by approximately six U.S. grade levels -- in other words, from a postgraduate to a high school level. This translates to roughly a 90% relative boost over the supervised fine-tuning baseline, all while maintaining factual accuracy and high-quality language. An in-depth analysis of our approach shows that balanced rewards lead to systematic modifications in the base model, likely contributing to smoother optimization and superior performance. We envision this work as a step toward bridging the gap between scholarly research and the general public, particularly younger readers and those without a college degree. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17088v1-abstract-full').style.display = 'none'; document.getElementById('2410.17088v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02901">arXiv:2410.02901</a> <span> [<a href="https://arxiv.org/pdf/2410.02901">pdf</a>, <a href="https://arxiv.org/format/2410.02901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/QCE57702.2023.00089">10.1109/QCE57702.2023.00089 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GTQCP: Greedy Topology-Aware Quantum Circuit Partitioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J">Joseph Clark</a>, <a href="/search/cs?searchtype=author&query=Humble%2C+T+S">Travis S. Humble</a>, <a href="/search/cs?searchtype=author&query=Thapliyal%2C+H">Himanshu Thapliyal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02901v1-abstract-short" style="display: inline;"> We propose Greedy Topology-Aware Quantum Circuit Partitioning (GTQCP), a novel quantum gate circuit partitioning method which partitions circuits by applying a greedy heuristic to the qubit dependency graph of the circuit. GTQCP is compared against three other gate partitioning methods, two of which (QuickPartitioner and ScanPartitioner) are part of the Berkley Quantum Synthesis Toolkit. GTQCP is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02901v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02901v1-abstract-full" style="display: none;"> We propose Greedy Topology-Aware Quantum Circuit Partitioning (GTQCP), a novel quantum gate circuit partitioning method which partitions circuits by applying a greedy heuristic to the qubit dependency graph of the circuit. GTQCP is compared against three other gate partitioning methods, two of which (QuickPartitioner and ScanPartitioner) are part of the Berkley Quantum Synthesis Toolkit. GTQCP is shown to have 18% run time improvement ratio over the fastest approach (QuickPartitioner), and a 96% improvement over the highest quality approach (ScanPartitioner). The algorithm also demonstrates nearly identical result quality (number of partitions) compared with ScanPartitioner, and a 38% quality improvement over QuickPartitioner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02901v1-abstract-full').style.display = 'none'; document.getElementById('2410.02901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures, 3 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2023 IEEE International Conference on Quantum Computing and Engineering (QCE), 2023, pp. 739-744 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11629">arXiv:2409.11629</a> <span> [<a href="https://arxiv.org/pdf/2409.11629">pdf</a>, <a href="https://arxiv.org/format/2409.11629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Designing Interfaces for Multimodal Vector Search Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Elliott%2C+O+P">Owen Pendrigh Elliott</a>, <a href="/search/cs?searchtype=author&query=Hamer%2C+T">Tom Hamer</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jesse Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11629v1-abstract-short" style="display: inline;"> Multimodal vector search offers a new paradigm for information retrieval by exposing numerous pieces of functionality which are not possible in traditional lexical search engines. While multimodal vector search can be treated as a drop in replacement for these traditional systems, the experience can be significantly enhanced by leveraging the unique capabilities of multimodal search. Central to an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11629v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11629v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11629v1-abstract-full" style="display: none;"> Multimodal vector search offers a new paradigm for information retrieval by exposing numerous pieces of functionality which are not possible in traditional lexical search engines. While multimodal vector search can be treated as a drop in replacement for these traditional systems, the experience can be significantly enhanced by leveraging the unique capabilities of multimodal search. Central to any information retrieval system is a user who expresses an information need, traditional user interfaces with a single search bar allow users to interact with lexical search systems effectively however are not necessarily optimal for multimodal vector search. In this paper we explore novel capabilities of multimodal vector search applications utilising CLIP models and present implementations and design patterns which better allow users to express their information needs and effectively interact with these systems in an information retrieval context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11629v1-abstract-full').style.display = 'none'; document.getElementById('2409.11629v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 8 figures, CIKM 2024 MMSR Workshop</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.5.2; H.1.1; H.1.2; H.3.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09103">arXiv:2409.09103</a> <span> [<a href="https://arxiv.org/pdf/2409.09103">pdf</a>, <a href="https://arxiv.org/ps/2409.09103">ps</a>, <a href="https://arxiv.org/format/2409.09103">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Improving the Reliability of Quantum Circuits by Evolving Heterogeneous Ensembles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parry%2C+O">Owain Parry</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">John Clark</a>, <a href="/search/cs?searchtype=author&query=McMinn%2C+P">Phil McMinn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09103v1-abstract-short" style="display: inline;"> Quantum computers can perform certain operations exponentially faster than classical computers, but designing quantum circuits is challenging. To that end, researchers used evolutionary algorithms to produce probabilistic quantum circuits that give the correct output more often than not for any input. They can be executed multiple times, with the outputs combined using a classical method (such as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09103v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09103v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09103v1-abstract-full" style="display: none;"> Quantum computers can perform certain operations exponentially faster than classical computers, but designing quantum circuits is challenging. To that end, researchers used evolutionary algorithms to produce probabilistic quantum circuits that give the correct output more often than not for any input. They can be executed multiple times, with the outputs combined using a classical method (such as voting) to produce the final output, effectively creating a homogeneous ensemble of circuits (i.e., all identical). Inspired by n-version programming and ensemble learning, we developed a tool that uses an evolutionary algorithm to generate heterogeneous ensembles of circuits (i.e., all different), named QuEEn. We used it to evolve ensembles to solve the Iris classification problem. When using ideal simulation, we found the performance of heterogeneous ensembles to be greater than that of homogeneous ensembles to a statistically significant degree. When using noisy simulation, we still observed a statistically significant improvement in the majority of cases. Our results indicate that evolving heterogeneous ensembles is an effective strategy for improving the reliability of quantum circuits. This is particularly relevant in the current NISQ era of quantum computing where computers do not yet have good tolerance to quantum noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09103v1-abstract-full').style.display = 'none'; document.getElementById('2409.09103v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06020">arXiv:2409.06020</a> <span> [<a href="https://arxiv.org/pdf/2409.06020">pdf</a>, <a href="https://arxiv.org/format/2409.06020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ISQED60706.2024.10528701">10.1109/ISQED60706.2024.10528701 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Peephole Optimization for Quantum Approximate Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J">Joseph Clark</a>, <a href="/search/cs?searchtype=author&query=Thapliyal%2C+H">Himanshu Thapliyal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06020v1-abstract-short" style="display: inline;"> Peephole optimization of quantum circuits provides a method of leveraging standard circuit synthesis approaches into scalable quantum circuit optimization. One application of this technique partitions an entire circuit into a series of peepholes and produces multiple approximations of each partitioned subcircuit. A single approximation of each subcircuit is then selected to form optimized result c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06020v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06020v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06020v1-abstract-full" style="display: none;"> Peephole optimization of quantum circuits provides a method of leveraging standard circuit synthesis approaches into scalable quantum circuit optimization. One application of this technique partitions an entire circuit into a series of peepholes and produces multiple approximations of each partitioned subcircuit. A single approximation of each subcircuit is then selected to form optimized result circuits. We propose a series of improvements to the final phase of this architecture, which include the addition of error awareness and a better method of approximating the correctness of the result. We evaluated these proposed improvements on a set of benchmark circuits using the IBMQ FakeWashington simulator. The results demonstrate that our best-performing method provides an average reduction in Total Variational Distance (TVD) and Jensen-Shannon Divergence (JSD) of 18.2% and 15.8%, respectively, compared with the Qiskit optimizer. This also constitutes an improvement in TVD of 11.4% and JSD of 9.0% over existing solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06020v1-abstract-full').style.display = 'none'; document.getElementById('2409.06020v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures, 1 table</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2024 25th International Symposium on Quality Electronic Design (ISQED), 2024, pp. 1-8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03899">arXiv:2408.03899</a> <span> [<a href="https://arxiv.org/pdf/2408.03899">pdf</a>, <a href="https://arxiv.org/format/2408.03899">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> </div> </div> <p class="title is-5 mathjax"> Simplifying Scholarly Abstracts for Accessible Digital Libraries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haining Wang</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jason Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03899v1-abstract-short" style="display: inline;"> Standing at the forefront of knowledge dissemination, digital libraries curate vast collections of scientific literature. However, these scholarly writings are often laden with jargon and tailored for domain experts rather than the general public. As librarians, we strive to offer services to a diverse audience, including those with lower reading levels. To extend our services beyond mere access,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03899v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03899v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03899v1-abstract-full" style="display: none;"> Standing at the forefront of knowledge dissemination, digital libraries curate vast collections of scientific literature. However, these scholarly writings are often laden with jargon and tailored for domain experts rather than the general public. As librarians, we strive to offer services to a diverse audience, including those with lower reading levels. To extend our services beyond mere access, we propose fine-tuning a language model to rewrite scholarly abstracts into more comprehensible versions, thereby making scholarly literature more accessible when requested. We began by introducing a corpus specifically designed for training models to simplify scholarly abstracts. This corpus consists of over three thousand pairs of abstracts and significance statements from diverse disciplines. We then fine-tuned four language models using this corpus. The outputs from the models were subsequently examined both quantitatively for accessibility and semantic coherence, and qualitatively for language quality, faithfulness, and completeness. Our findings show that the resulting models can improve readability by over three grade levels, while maintaining fidelity to the original content. Although commercial state-of-the-art models still hold an edge, our models are much more compact, can be deployed locally in an affordable manner, and alleviate the privacy concerns associated with using commercial models. We envision this work as a step toward more inclusive and accessible libraries, improving our services for young readers and those without a college degree. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03899v1-abstract-full').style.display = 'none'; document.getElementById('2408.03899v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Initial submission to JCDL2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09373">arXiv:2407.09373</a> <span> [<a href="https://arxiv.org/pdf/2407.09373">pdf</a>, <a href="https://arxiv.org/format/2407.09373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Personalised Patient Risk Prediction Using Temporal Hospital Data Trajectories </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Barnes%2C+T">Thea Barnes</a>, <a href="/search/cs?searchtype=author&query=Werner%2C+E">Enrico Werner</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+N">Jeffrey N. Clark</a>, <a href="/search/cs?searchtype=author&query=Santos-Rodriguez%2C+R">Raul Santos-Rodriguez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09373v1-abstract-short" style="display: inline;"> Quantifying a patient's health status provides clinicians with insight into patient risk, and the ability to better triage and manage resources. Early Warning Scores (EWS) are widely deployed to measure overall health status, and risk of adverse outcomes, in hospital patients. However, current EWS are limited both by their lack of personalisation and use of static observations. We propose a pipeli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09373v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09373v1-abstract-full" style="display: none;"> Quantifying a patient's health status provides clinicians with insight into patient risk, and the ability to better triage and manage resources. Early Warning Scores (EWS) are widely deployed to measure overall health status, and risk of adverse outcomes, in hospital patients. However, current EWS are limited both by their lack of personalisation and use of static observations. We propose a pipeline that groups intensive care unit patients by the trajectories of observations data throughout their stay as a basis for the development of personalised risk predictions. Feature importance is considered to provide model explainability. Using the MIMIC-IV dataset, six clusters were identified, capturing differences in disease codes, observations, lengths of admissions and outcomes. Applying the pipeline to data from just the first four hours of each ICU stay assigns the majority of patients to the same cluster as when the entire stay duration is considered. In-hospital mortality prediction models trained on individual clusters had higher F1 score performance in five of the six clusters when compared against the unclustered patient cohort. The pipeline could form the basis of a clinical decision support tool, working to improve the clinical characterisation of risk groups and the early detection of patient deterioration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09373v1-abstract-full').style.display = 'none'; document.getElementById('2407.09373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08887">arXiv:2407.08887</a> <span> [<a href="https://arxiv.org/pdf/2407.08887">pdf</a>, <a href="https://arxiv.org/format/2407.08887">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automatic Pruning of Fine-tuning Datasets for Transformer-based Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tayaranian%2C+M">Mohammadreza Tayaranian</a>, <a href="/search/cs?searchtype=author&query=Mozafari%2C+S+H">Seyyed Hasan Mozafari</a>, <a href="/search/cs?searchtype=author&query=Meyer%2C+B+H">Brett H. Meyer</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a>, <a href="/search/cs?searchtype=author&query=Gross%2C+W+J">Warren J. Gross</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08887v1-abstract-short" style="display: inline;"> Transformer-based language models have shown state-of-the-art performance on a variety of natural language understanding tasks. To achieve this performance, these models are first pre-trained on general corpus and then fine-tuned on downstream tasks. Previous work studied the effect of pruning the training set of the downstream tasks on the performance of the model on its evaluation set. In this w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08887v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08887v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08887v1-abstract-full" style="display: none;"> Transformer-based language models have shown state-of-the-art performance on a variety of natural language understanding tasks. To achieve this performance, these models are first pre-trained on general corpus and then fine-tuned on downstream tasks. Previous work studied the effect of pruning the training set of the downstream tasks on the performance of the model on its evaluation set. In this work, we propose an automatic dataset pruning method for the training set of fine-tuning tasks. Our method is based on the model's success rate in correctly classifying each training data point. Unlike previous work which relies on user feedback to determine subset size, our method automatically extracts training subsets that are adapted for each pair of model and fine-tuning task. Our method provides multiple subsets for use in dataset pruning that navigate the trade-off between subset size and evaluation accuracy. Our largest subset, which we also refer to as the winning ticket subset, is on average $3 \times$ smaller than the original training set of the fine-tuning task. Our experiments on 5 downstream tasks and 2 language models show that, on average, fine-tuning on the winning ticket subsets results in a $0.1 \%$ increase in the evaluation performance of the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08887v1-abstract-full').style.display = 'none'; document.getElementById('2407.08887v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 17 figures. Accepted at the Third Conference on Lifelong Learning Agents (CoLLAs 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19522">arXiv:2405.19522</a> <span> [<a href="https://arxiv.org/pdf/2405.19522">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Artificial Intelligence Index Report 2024 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maslej%2C+N">Nestor Maslej</a>, <a href="/search/cs?searchtype=author&query=Fattorini%2C+L">Loredana Fattorini</a>, <a href="/search/cs?searchtype=author&query=Perrault%2C+R">Raymond Perrault</a>, <a href="/search/cs?searchtype=author&query=Parli%2C+V">Vanessa Parli</a>, <a href="/search/cs?searchtype=author&query=Reuel%2C+A">Anka Reuel</a>, <a href="/search/cs?searchtype=author&query=Brynjolfsson%2C+E">Erik Brynjolfsson</a>, <a href="/search/cs?searchtype=author&query=Etchemendy%2C+J">John Etchemendy</a>, <a href="/search/cs?searchtype=author&query=Ligett%2C+K">Katrina Ligett</a>, <a href="/search/cs?searchtype=author&query=Lyons%2C+T">Terah Lyons</a>, <a href="/search/cs?searchtype=author&query=Manyika%2C+J">James Manyika</a>, <a href="/search/cs?searchtype=author&query=Niebles%2C+J+C">Juan Carlos Niebles</a>, <a href="/search/cs?searchtype=author&query=Shoham%2C+Y">Yoav Shoham</a>, <a href="/search/cs?searchtype=author&query=Wald%2C+R">Russell Wald</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jack Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19522v1-abstract-short" style="display: inline;"> The 2024 Index is our most comprehensive to date and arrives at an important moment when AI's influence on society has never been more pronounced. This year, we have broadened our scope to more extensively cover essential trends such as technical advancements in AI, public perceptions of the technology, and the geopolitical dynamics surrounding its development. Featuring more original data than ev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19522v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19522v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19522v1-abstract-full" style="display: none;"> The 2024 Index is our most comprehensive to date and arrives at an important moment when AI's influence on society has never been more pronounced. This year, we have broadened our scope to more extensively cover essential trends such as technical advancements in AI, public perceptions of the technology, and the geopolitical dynamics surrounding its development. Featuring more original data than ever before, this edition introduces new estimates on AI training costs, detailed analyses of the responsible AI landscape, and an entirely new chapter dedicated to AI's impact on science and medicine. The AI Index report tracks, collates, distills, and visualizes data related to artificial intelligence (AI). Our mission is to provide unbiased, rigorously vetted, broadly sourced data in order for policymakers, researchers, executives, journalists, and the general public to develop a more thorough and nuanced understanding of the complex field of AI. The AI Index is recognized globally as one of the most credible and authoritative sources for data and insights on artificial intelligence. Previous editions have been cited in major newspapers, including the The New York Times, Bloomberg, and The Guardian, have amassed hundreds of academic citations, and been referenced by high-level policymakers in the United States, the United Kingdom, and the European Union, among other places. This year's edition surpasses all previous ones in size, scale, and scope, reflecting the growing significance that AI is coming to hold in all of our lives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19522v1-abstract-full').style.display = 'none'; document.getElementById('2405.19522v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17813">arXiv:2405.17813</a> <span> [<a href="https://arxiv.org/pdf/2405.17813">pdf</a>, <a href="https://arxiv.org/format/2405.17813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> The Impacts of Data, Ordering, and Intrinsic Dimensionality on Recall in Hierarchical Navigable Small Worlds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Elliott%2C+O+P">Owen Pendrigh Elliott</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jesse Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17813v1-abstract-short" style="display: inline;"> Vector search systems, pivotal in AI applications, often rely on the Hierarchical Navigable Small Worlds (HNSW) algorithm. However, the behaviour of HNSW under real-world scenarios using vectors generated with deep learning models remains under-explored. Existing Approximate Nearest Neighbours (ANN) benchmarks and research typically has an over-reliance on simplistic datasets like MNIST or SIFT1M… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17813v1-abstract-full').style.display = 'inline'; document.getElementById('2405.17813v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17813v1-abstract-full" style="display: none;"> Vector search systems, pivotal in AI applications, often rely on the Hierarchical Navigable Small Worlds (HNSW) algorithm. However, the behaviour of HNSW under real-world scenarios using vectors generated with deep learning models remains under-explored. Existing Approximate Nearest Neighbours (ANN) benchmarks and research typically has an over-reliance on simplistic datasets like MNIST or SIFT1M and fail to reflect the complexity of current use-cases. Our investigation focuses on HNSW's efficacy across a spectrum of datasets, including synthetic vectors tailored to mimic specific intrinsic dimensionalities, widely-used retrieval benchmarks with popular embedding models, and proprietary e-commerce image data with CLIP models. We survey the most popular HNSW vector databases and collate their default parameters to provide a realistic fixed parameterisation for the duration of the paper. We discover that the recall of approximate HNSW search, in comparison to exact K Nearest Neighbours (KNN) search, is linked to the vector space's intrinsic dimensionality and significantly influenced by the data insertion sequence. Our methodology highlights how insertion order, informed by measurable properties such as the pointwise Local Intrinsic Dimensionality (LID) or known categories, can shift recall by up to 12 percentage points. We also observe that running popular benchmark datasets with HNSW instead of KNN can shift rankings by up to three positions for some models. This work underscores the need for more nuanced benchmarks and design considerations in developing robust vector search systems using approximate vector search algorithms. This study presents a number of scenarios with varying real world applicability which aim to better increase understanding and future development of ANN algorithms and embedding <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17813v1-abstract-full').style.display = 'none'; document.getElementById('2405.17813v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13964">arXiv:2405.13964</a> <span> [<a href="https://arxiv.org/pdf/2405.13964">pdf</a>, <a href="https://arxiv.org/format/2405.13964">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Design Editing for Offline Model-based Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Ye Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Youyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Can Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haolun Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zixuan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianmo Li</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13964v3-abstract-short" style="display: inline;"> Offline model-based optimization (MBO) aims to maximize a black-box objective function using only an offline dataset of designs and scores. These tasks span various domains, such as robotics, material design, and protein and molecular engineering. A common approach involves training a surrogate model using existing designs and their corresponding scores, and then generating new designs through gra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13964v3-abstract-full').style.display = 'inline'; document.getElementById('2405.13964v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13964v3-abstract-full" style="display: none;"> Offline model-based optimization (MBO) aims to maximize a black-box objective function using only an offline dataset of designs and scores. These tasks span various domains, such as robotics, material design, and protein and molecular engineering. A common approach involves training a surrogate model using existing designs and their corresponding scores, and then generating new designs through gradient-based updates with respect to the surrogate model. This method suffers from the out-of-distribution issue, where the surrogate model may erroneously predict high scores for unseen designs. To address this challenge, we introduce a novel method, Design Editing for Offline Model-based Optimization} (DEMO), which leverages a diffusion prior to calibrate overly optimized designs. DEMO first generates pseudo design candidates by performing gradient ascent with respect to a surrogate model. Then, an editing process refines these pseudo design candidates by introducing noise and subsequently denoising them with a diffusion prior trained on the offline dataset, ensuring they align with the distribution of valid designs. We provide a theoretical proof that the difference between the final optimized designs generated by DEMO and the prior distribution of the offline dataset is controlled by the noise injected during the editing process. Empirical evaluations on seven offline MBO tasks show that DEMO outperforms various baseline methods, achieving the highest mean rank of 2.1 and a median rank of 1. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13964v3-abstract-full').style.display = 'none'; document.getElementById('2405.13964v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00670">arXiv:2405.00670</a> <span> [<a href="https://arxiv.org/pdf/2405.00670">pdf</a>, <a href="https://arxiv.org/format/2405.00670">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Adapting Pretrained Networks for Image Quality Assessment on High Dynamic Range Displays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chubarau%2C+A">Andrei Chubarau</a>, <a href="/search/cs?searchtype=author&query=Yoo%2C+H">Hyunjin Yoo</a>, <a href="/search/cs?searchtype=author&query=Akhavan%2C+T">Tara Akhavan</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">James Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00670v1-abstract-short" style="display: inline;"> Conventional image quality metrics (IQMs), such as PSNR and SSIM, are designed for perceptually uniform gamma-encoded pixel values and cannot be directly applied to perceptually non-uniform linear high-dynamic-range (HDR) colors. Similarly, most of the available datasets consist of standard-dynamic-range (SDR) images collected in standard and possibly uncontrolled viewing conditions. Popular pre-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00670v1-abstract-full').style.display = 'inline'; document.getElementById('2405.00670v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00670v1-abstract-full" style="display: none;"> Conventional image quality metrics (IQMs), such as PSNR and SSIM, are designed for perceptually uniform gamma-encoded pixel values and cannot be directly applied to perceptually non-uniform linear high-dynamic-range (HDR) colors. Similarly, most of the available datasets consist of standard-dynamic-range (SDR) images collected in standard and possibly uncontrolled viewing conditions. Popular pre-trained neural networks are likewise intended for SDR inputs, restricting their direct application to HDR content. On the other hand, training HDR models from scratch is challenging due to limited available HDR data. In this work, we explore more effective approaches for training deep learning-based models for image quality assessment (IQA) on HDR data. We leverage networks pre-trained on SDR data (source domain) and re-target these models to HDR (target domain) with additional fine-tuning and domain adaptation. We validate our methods on the available HDR IQA datasets, demonstrating that models trained with our combined recipe outperform previous baselines, converge much quicker, and reliably generalize to HDR inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00670v1-abstract-full').style.display = 'none'; document.getElementById('2405.00670v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 3 figures, 3 tables. Submitted to Human Vision and Electronic Imaging 2024 (HVEI)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08535">arXiv:2404.08535</a> <span> [<a href="https://arxiv.org/pdf/2404.08535">pdf</a>, <a href="https://arxiv.org/format/2404.08535">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generalized Contrastive Learning for Multi-Modal Retrieval and Ranking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tianyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+M+C">Myong Chol Jung</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jesse Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08535v1-abstract-short" style="display: inline;"> Contrastive learning has gained widespread adoption for retrieval tasks due to its minimal requirement for manual annotations. However, popular contrastive frameworks typically learn from binary relevance, making them ineffective at incorporating direct fine-grained rankings. In this paper, we curate a large-scale dataset featuring detailed relevance scores for each query-document pair to facilita… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08535v1-abstract-full').style.display = 'inline'; document.getElementById('2404.08535v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08535v1-abstract-full" style="display: none;"> Contrastive learning has gained widespread adoption for retrieval tasks due to its minimal requirement for manual annotations. However, popular contrastive frameworks typically learn from binary relevance, making them ineffective at incorporating direct fine-grained rankings. In this paper, we curate a large-scale dataset featuring detailed relevance scores for each query-document pair to facilitate future research and evaluation. Subsequently, we propose Generalized Contrastive Learning for Multi-Modal Retrieval and Ranking (GCL), which is designed to learn from fine-grained rankings beyond binary relevance scores. Our results show that GCL achieves a 94.5% increase in NDCG@10 for in-domain and 26.3 to 48.8% increases for cold-start evaluations, all relative to the CLIP baseline and involving ground truth rankings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08535v1-abstract-full').style.display = 'none'; document.getElementById('2404.08535v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15941">arXiv:2403.15941</a> <span> [<a href="https://arxiv.org/pdf/2403.15941">pdf</a>, <a href="https://arxiv.org/format/2403.15941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Explore until Confident: Efficient Exploration for Embodied Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+A+Z">Allen Z. Ren</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jaden Clark</a>, <a href="/search/cs?searchtype=author&query=Dixit%2C+A">Anushri Dixit</a>, <a href="/search/cs?searchtype=author&query=Itkina%2C+M">Masha Itkina</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+A">Anirudha Majumdar</a>, <a href="/search/cs?searchtype=author&query=Sadigh%2C+D">Dorsa Sadigh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15941v3-abstract-short" style="display: inline;"> We consider the problem of Embodied Question Answering (EQA), which refers to settings where an embodied agent such as a robot needs to actively explore an environment to gather information until it is confident about the answer to a question. In this work, we leverage the strong semantic reasoning capabilities of large vision-language models (VLMs) to efficiently explore and answer such questions… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15941v3-abstract-full').style.display = 'inline'; document.getElementById('2403.15941v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15941v3-abstract-full" style="display: none;"> We consider the problem of Embodied Question Answering (EQA), which refers to settings where an embodied agent such as a robot needs to actively explore an environment to gather information until it is confident about the answer to a question. In this work, we leverage the strong semantic reasoning capabilities of large vision-language models (VLMs) to efficiently explore and answer such questions. However, there are two main challenges when using VLMs in EQA: they do not have an internal memory for mapping the scene to be able to plan how to explore over time, and their confidence can be miscalibrated and can cause the robot to prematurely stop exploration or over-explore. We propose a method that first builds a semantic map of the scene based on depth information and via visual prompting of a VLM - leveraging its vast knowledge of relevant regions of the scene for exploration. Next, we use conformal prediction to calibrate the VLM's question answering confidence, allowing the robot to know when to stop exploration - leading to a more calibrated and efficient exploration strategy. To test our framework in simulation, we also contribute a new EQA dataset with diverse, realistic human-robot scenarios and scenes built upon the Habitat-Matterport 3D Research Dataset (HM3D). Both simulated and real robot experiments show our proposed approach improves the performance and efficiency over baselines that do no leverage VLM for exploration or do not calibrate its confidence. Webpage with experiment videos and code: https://explore-eqa.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15941v3-abstract-full').style.display = 'none'; document.getElementById('2403.15941v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Robotics: Science and Systems (RSS) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14056">arXiv:2403.14056</a> <span> [<a href="https://arxiv.org/pdf/2403.14056">pdf</a>, <a href="https://arxiv.org/format/2403.14056">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Semantics from Space: Satellite-Guided Thermal Semantic Segmentation Annotation for Aerial Field Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+C">Connor Lee</a>, <a href="/search/cs?searchtype=author&query=Soedarmadji%2C+S">Saraswati Soedarmadji</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+M">Matthew Anderson</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+A+J">Anthony J. Clark</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+S">Soon-Jo Chung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14056v1-abstract-short" style="display: inline;"> We present a new method to automatically generate semantic segmentation annotations for thermal imagery captured from an aerial vehicle by utilizing satellite-derived data products alongside onboard global positioning and attitude estimates. This new capability overcomes the challenge of developing thermal semantic perception algorithms for field robots due to the lack of annotated thermal field d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14056v1-abstract-full').style.display = 'inline'; document.getElementById('2403.14056v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14056v1-abstract-full" style="display: none;"> We present a new method to automatically generate semantic segmentation annotations for thermal imagery captured from an aerial vehicle by utilizing satellite-derived data products alongside onboard global positioning and attitude estimates. This new capability overcomes the challenge of developing thermal semantic perception algorithms for field robots due to the lack of annotated thermal field datasets and the time and costs of manual annotation, enabling precise and rapid annotation of thermal data from field collection efforts at a massively-parallelizable scale. By incorporating a thermal-conditioned refinement step with visual foundation models, our approach can produce highly-precise semantic segmentation labels using low-resolution satellite land cover data for little-to-no cost. It achieves 98.5% of the performance from using costly high-resolution options and demonstrates between 70-160% improvement over popular zero-shot semantic segmentation methods based on large vision-language models currently used for generating annotations for RGB imagery. Code will be available at: https://github.com/connorlee77/aerial-auto-segment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14056v1-abstract-full').style.display = 'none'; document.getElementById('2403.14056v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12331">arXiv:2403.12331</a> <span> [<a href="https://arxiv.org/pdf/2403.12331">pdf</a>, <a href="https://arxiv.org/format/2403.12331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Few-view High-resolution Photon-counting Extremity CT at Halved Dose for a Clinical Trial </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+M">Mengzhou Li</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+C">Chuang Niu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Ge Wang</a>, <a href="/search/cs?searchtype=author&query=Amma%2C+M+R">Maya R Amma</a>, <a href="/search/cs?searchtype=author&query=Chapagain%2C+K+M">Krishna M Chapagain</a>, <a href="/search/cs?searchtype=author&query=Gabrielson%2C+S">Stefan Gabrielson</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Andrew Li</a>, <a href="/search/cs?searchtype=author&query=Jonker%2C+K">Kevin Jonker</a>, <a href="/search/cs?searchtype=author&query=de+Ruiter%2C+N">Niels de Ruiter</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+A">Jennifer A Clark</a>, <a href="/search/cs?searchtype=author&query=Butler%2C+P">Phil Butler</a>, <a href="/search/cs?searchtype=author&query=Butler%2C+A">Anthony Butler</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hengyong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12331v1-abstract-short" style="display: inline;"> The latest X-ray photon-counting computed tomography (PCCT) for extremity allows multi-energy high-resolution (HR) imaging for tissue characterization and material decomposition. However, both radiation dose and imaging speed need improvement for contrast-enhanced and other studies. Despite the success of deep learning methods for 2D few-view reconstruction, applying them to HR volumetric reconstr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12331v1-abstract-full').style.display = 'inline'; document.getElementById('2403.12331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12331v1-abstract-full" style="display: none;"> The latest X-ray photon-counting computed tomography (PCCT) for extremity allows multi-energy high-resolution (HR) imaging for tissue characterization and material decomposition. However, both radiation dose and imaging speed need improvement for contrast-enhanced and other studies. Despite the success of deep learning methods for 2D few-view reconstruction, applying them to HR volumetric reconstruction of extremity scans for clinical diagnosis has been limited due to GPU memory constraints, training data scarcity, and domain gap issues. In this paper, we propose a deep learning-based approach for PCCT image reconstruction at halved dose and doubled speed in a New Zealand clinical trial. Particularly, we present a patch-based volumetric refinement network to alleviate the GPU memory limitation, train network with synthetic data, and use model-based iterative refinement to bridge the gap between synthetic and real-world data. The simulation and phantom experiments demonstrate consistently improved results under different acquisition conditions on both in- and off-domain structures using a fixed network. The image quality of 8 patients from the clinical trial are evaluated by three radiologists in comparison with the standard image reconstruction with a full-view dataset. It is shown that our proposed approach is essentially identical to or better than the clinical benchmark in terms of diagnostic image quality scores. Our approach has a great potential to improve the safety and efficiency of PCCT without compromising image quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12331v1-abstract-full').style.display = 'none'; document.getElementById('2403.12331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.06269">arXiv:2403.06269</a> <span> [<a href="https://arxiv.org/pdf/2403.06269">pdf</a>, <a href="https://arxiv.org/format/2403.06269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FastVideoEdit: Leveraging Consistency Models for Efficient Text-to-Video Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Youyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Ju%2C+X">Xuan Ju</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.06269v2-abstract-short" style="display: inline;"> Diffusion models have demonstrated remarkable capabilities in text-to-image and text-to-video generation, opening up possibilities for video editing based on textual input. However, the computational cost associated with sequential sampling in diffusion models poses challenges for efficient video editing. Existing approaches relying on image generation models for video editing suffer from time-con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06269v2-abstract-full').style.display = 'inline'; document.getElementById('2403.06269v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.06269v2-abstract-full" style="display: none;"> Diffusion models have demonstrated remarkable capabilities in text-to-image and text-to-video generation, opening up possibilities for video editing based on textual input. However, the computational cost associated with sequential sampling in diffusion models poses challenges for efficient video editing. Existing approaches relying on image generation models for video editing suffer from time-consuming one-shot fine-tuning, additional condition extraction, or DDIM inversion, making real-time applications impractical. In this work, we propose FastVideoEdit, an efficient zero-shot video editing approach inspired by Consistency Models (CMs). By leveraging the self-consistency property of CMs, we eliminate the need for time-consuming inversion or additional condition extraction, reducing editing time. Our method enables direct mapping from source video to target video with strong preservation ability utilizing a special variance schedule. This results in improved speed advantages, as fewer sampling steps can be used while maintaining comparable generation quality. Experimental results validate the state-of-the-art performance and speed advantages of FastVideoEdit across evaluation metrics encompassing editing speed, temporal consistency, and text-video alignment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06269v2-abstract-full').style.display = 'none'; document.getElementById('2403.06269v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span> [<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&query=Love%2C+J">Juliette Love</a> , et al. (1112 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v5-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v5-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra's state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5's long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (>99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'none'; document.getElementById('2403.05530v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17934">arXiv:2402.17934</a> <span> [<a href="https://arxiv.org/pdf/2402.17934">pdf</a>, <a href="https://arxiv.org/format/2402.17934">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Inducing Generalization across Languages and Tasks using Featurized Low-Rank Mixtures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chu-Cheng Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Han Lu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yun Zhu</a>, <a href="/search/cs?searchtype=author&query=Whitehouse%2C+C">Chenxi Whitehouse</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongkun Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17934v2-abstract-short" style="display: inline;"> Adapting pretrained large language models (LLMs) to various downstream tasks in tens or hundreds of human languages is computationally expensive. Parameter-efficient fine-tuning (PEFT) significantly reduces the adaptation cost, by tuning only a small amount of parameters. However, common PEFT methods LoRA (Hu et al., 2022) suffer from suboptimal performance on diverse dataset mixtures, due to aggr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17934v2-abstract-full').style.display = 'inline'; document.getElementById('2402.17934v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17934v2-abstract-full" style="display: none;"> Adapting pretrained large language models (LLMs) to various downstream tasks in tens or hundreds of human languages is computationally expensive. Parameter-efficient fine-tuning (PEFT) significantly reduces the adaptation cost, by tuning only a small amount of parameters. However, common PEFT methods LoRA (Hu et al., 2022) suffer from suboptimal performance on diverse dataset mixtures, due to aggressive parameter tying and negative interference among different datasets. In this work, we propose Featurized Low-rank Mixtures (FLix), a novel PEFT method designed for effective multitask multilingual adaptation. FLix associates each unique dataset feature, such as the dataset's language or task, with its own low-rank weight update parameters. By composing feature-specific parameters for each dataset, FLix can accommodate diverse dataset mixtures and generalize better to unseen datasets. Our experiments show that FLix leads to significant improvements over a variety of tasks for both supervised learning and zero-shot settings with gains of up to $14.2$ inexact match points in zero-shot semantic parsing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17934v2-abstract-full').style.display = 'none'; document.getElementById('2402.17934v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Revised version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01169">arXiv:2402.01169</a> <span> [<a href="https://arxiv.org/pdf/2402.01169">pdf</a>, <a href="https://arxiv.org/ps/2402.01169">ps</a>, <a href="https://arxiv.org/format/2402.01169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Faster Inference of Integer SWIN Transformer by Removing the GELU Activation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tayaranian%2C+M">Mohammadreza Tayaranian</a>, <a href="/search/cs?searchtype=author&query=Mozafari%2C+S+H">Seyyed Hasan Mozafari</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a>, <a href="/search/cs?searchtype=author&query=Meyer%2C+B">Brett Meyer</a>, <a href="/search/cs?searchtype=author&query=Gross%2C+W">Warren Gross</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01169v1-abstract-short" style="display: inline;"> SWIN transformer is a prominent vision transformer model that has state-of-the-art accuracy in image classification tasks. Despite this success, its unique architecture causes slower inference compared with similar deep neural networks. Integer quantization of the model is one of the methods used to improve its inference latency. However, state-of-the-art has not been able to fully quantize the mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01169v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01169v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01169v1-abstract-full" style="display: none;"> SWIN transformer is a prominent vision transformer model that has state-of-the-art accuracy in image classification tasks. Despite this success, its unique architecture causes slower inference compared with similar deep neural networks. Integer quantization of the model is one of the methods used to improve its inference latency. However, state-of-the-art has not been able to fully quantize the model. In this work, we improve upon the inference latency of the state-of-the-art methods by removing the floating-point operations, which are associated with the GELU activation in Swin Transformer. While previous work proposed to replace the non-integer operations with linear approximation functions, we propose to replace GELU with ReLU activation. The advantage of ReLU over previous methods is its low memory and computation complexity. We use iterative knowledge distillation to compensate for the lost accuracy due to replacing GELU with ReLU. We quantize our GELU-less SWIN transformer and show that on an RTX 4090 NVIDIA GPU we can improve the inference latency of the quantized SWIN transformer by at least $11\%$ while maintaining an accuracy drop of under $0.5\%$ on the ImageNet evaluation dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01169v1-abstract-full').style.display = 'none'; document.getElementById('2402.01169v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure. Submitted to Edge Intelligence Workshop III, an AAAI 2024 workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13212">arXiv:2401.13212</a> <span> [<a href="https://arxiv.org/pdf/2401.13212">pdf</a>, <a href="https://arxiv.org/format/2401.13212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AdCorDA: Classifier Refinement via Adversarial Correction and Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+L">Lulan Shen</a>, <a href="/search/cs?searchtype=author&query=Edalati%2C+A">Ali Edalati</a>, <a href="/search/cs?searchtype=author&query=Meyer%2C+B">Brett Meyer</a>, <a href="/search/cs?searchtype=author&query=Gross%2C+W">Warren Gross</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13212v1-abstract-short" style="display: inline;"> This paper describes a simple yet effective technique for refining a pretrained classifier network. The proposed AdCorDA method is based on modification of the training set and making use of the duality between network weights and layer inputs. We call this input space training. The method consists of two stages - adversarial correction followed by domain adaptation. Adversarial correction uses ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13212v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13212v1-abstract-full" style="display: none;"> This paper describes a simple yet effective technique for refining a pretrained classifier network. The proposed AdCorDA method is based on modification of the training set and making use of the duality between network weights and layer inputs. We call this input space training. The method consists of two stages - adversarial correction followed by domain adaptation. Adversarial correction uses adversarial attacks to correct incorrect training-set classifications. The incorrectly classified samples of the training set are removed and replaced with the adversarially corrected samples to form a new training set, and then, in the second stage, domain adaptation is performed back to the original training set. Extensive experimental validations show significant accuracy boosts of over 5% on the CIFAR-100 dataset. The technique can be straightforwardly applied to refinement of weight-quantized neural networks, where experiments show substantial enhancement in performance over the baseline. The adversarial correction technique also results in enhanced robustness to adversarial attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13212v1-abstract-full').style.display = 'none'; document.getElementById('2401.13212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.12014">arXiv:2401.12014</a> <span> [<a href="https://arxiv.org/pdf/2401.12014">pdf</a>, <a href="https://arxiv.org/format/2401.12014">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robustness to distribution shifts of compressed networks for edge devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+L">Lulan Shen</a>, <a href="/search/cs?searchtype=author&query=Edalati%2C+A">Ali Edalati</a>, <a href="/search/cs?searchtype=author&query=Meyer%2C+B">Brett Meyer</a>, <a href="/search/cs?searchtype=author&query=Gross%2C+W">Warren Gross</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+J">James J. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.12014v1-abstract-short" style="display: inline;"> It is necessary to develop efficient DNNs deployed on edge devices with limited computation resources. However, the compressed networks often execute new tasks in the target domain, which is different from the source domain where the original network is trained. It is important to investigate the robustness of compressed networks in two types of data distribution shifts: domain shifts and adversar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12014v1-abstract-full').style.display = 'inline'; document.getElementById('2401.12014v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.12014v1-abstract-full" style="display: none;"> It is necessary to develop efficient DNNs deployed on edge devices with limited computation resources. However, the compressed networks often execute new tasks in the target domain, which is different from the source domain where the original network is trained. It is important to investigate the robustness of compressed networks in two types of data distribution shifts: domain shifts and adversarial perturbations. In this study, we discover that compressed models are less robust to distribution shifts than their original networks. Interestingly, larger networks are more vulnerable to losing robustness than smaller ones, even when they are compressed to a similar size as the smaller networks. Furthermore, compact networks obtained by knowledge distillation are much more robust to distribution shifts than pruned networks. Finally, post-training quantization is a reliable method for achieving significant robustness to distribution shifts, and it outperforms both pruned and distilled models in terms of robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12014v1-abstract-full').style.display = 'none'; document.getElementById('2401.12014v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05566">arXiv:2401.05566</a> <span> [<a href="https://arxiv.org/pdf/2401.05566">pdf</a>, <a href="https://arxiv.org/format/2401.05566">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hubinger%2C+E">Evan Hubinger</a>, <a href="/search/cs?searchtype=author&query=Denison%2C+C">Carson Denison</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+J">Jesse Mu</a>, <a href="/search/cs?searchtype=author&query=Lambert%2C+M">Mike Lambert</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+M">Meg Tong</a>, <a href="/search/cs?searchtype=author&query=MacDiarmid%2C+M">Monte MacDiarmid</a>, <a href="/search/cs?searchtype=author&query=Lanham%2C+T">Tamera Lanham</a>, <a href="/search/cs?searchtype=author&query=Ziegler%2C+D+M">Daniel M. Ziegler</a>, <a href="/search/cs?searchtype=author&query=Maxwell%2C+T">Tim Maxwell</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+N">Newton Cheng</a>, <a href="/search/cs?searchtype=author&query=Jermyn%2C+A">Adam Jermyn</a>, <a href="/search/cs?searchtype=author&query=Askell%2C+A">Amanda Askell</a>, <a href="/search/cs?searchtype=author&query=Radhakrishnan%2C+A">Ansh Radhakrishnan</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+C">Cem Anil</a>, <a href="/search/cs?searchtype=author&query=Duvenaud%2C+D">David Duvenaud</a>, <a href="/search/cs?searchtype=author&query=Ganguli%2C+D">Deep Ganguli</a>, <a href="/search/cs?searchtype=author&query=Barez%2C+F">Fazl Barez</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jack Clark</a>, <a href="/search/cs?searchtype=author&query=Ndousse%2C+K">Kamal Ndousse</a>, <a href="/search/cs?searchtype=author&query=Sachan%2C+K">Kshitij Sachan</a>, <a href="/search/cs?searchtype=author&query=Sellitto%2C+M">Michael Sellitto</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+M">Mrinank Sharma</a>, <a href="/search/cs?searchtype=author&query=DasSarma%2C+N">Nova DasSarma</a>, <a href="/search/cs?searchtype=author&query=Grosse%2C+R">Roger Grosse</a>, <a href="/search/cs?searchtype=author&query=Kravec%2C+S">Shauna Kravec</a> , et al. (14 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05566v3-abstract-short" style="display: inline;"> Humans are capable of strategically deceptive behavior: behaving helpfully in most situations, but then behaving very differently in order to pursue alternative objectives when given the opportunity. If an AI system learned such a deceptive strategy, could we detect it and remove it using current state-of-the-art safety training techniques? To study this question, we construct proof-of-concept exa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05566v3-abstract-full').style.display = 'inline'; document.getElementById('2401.05566v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05566v3-abstract-full" style="display: none;"> Humans are capable of strategically deceptive behavior: behaving helpfully in most situations, but then behaving very differently in order to pursue alternative objectives when given the opportunity. If an AI system learned such a deceptive strategy, could we detect it and remove it using current state-of-the-art safety training techniques? To study this question, we construct proof-of-concept examples of deceptive behavior in large language models (LLMs). For example, we train models that write secure code when the prompt states that the year is 2023, but insert exploitable code when the stated year is 2024. We find that such backdoor behavior can be made persistent, so that it is not removed by standard safety training techniques, including supervised fine-tuning, reinforcement learning, and adversarial training (eliciting unsafe behavior and then training to remove it). The backdoor behavior is most persistent in the largest models and in models trained to produce chain-of-thought reasoning about deceiving the training process, with the persistence remaining even when the chain-of-thought is distilled away. Furthermore, rather than removing backdoors, we find that adversarial training can teach models to better recognize their backdoor triggers, effectively hiding the unsafe behavior. Our results suggest that, once a model exhibits deceptive behavior, standard techniques could fail to remove such deception and create a false impression of safety. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05566v3-abstract-full').style.display = 'none'; document.getElementById('2401.05566v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">updated to add missing acknowledgements</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04416">arXiv:2312.04416</a> <span> [<a href="https://arxiv.org/pdf/2312.04416">pdf</a>, <a href="https://arxiv.org/format/2312.04416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Monitoring Sustainable Global Development Along Shared Socioeconomic Pathways </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+M+W+L">Michelle W. L. Wan</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+N">Jeffrey N. Clark</a>, <a href="/search/cs?searchtype=author&query=Small%2C+E+A">Edward A. Small</a>, <a href="/search/cs?searchtype=author&query=Mayoral%2C+E+F">Elena Fillola Mayoral</a>, <a href="/search/cs?searchtype=author&query=Santos-Rodr%C3%ADguez%2C+R">Ra煤l Santos-Rodr铆guez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04416v1-abstract-short" style="display: inline;"> Sustainable global development is one of the most prevalent challenges facing the world today, hinging on the equilibrium between socioeconomic growth and environmental sustainability. We propose approaches to monitor and quantify sustainable development along the Shared Socioeconomic Pathways (SSPs), including mathematically derived scoring algorithms, and machine learning methods. These integrat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04416v1-abstract-full').style.display = 'inline'; document.getElementById('2312.04416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04416v1-abstract-full" style="display: none;"> Sustainable global development is one of the most prevalent challenges facing the world today, hinging on the equilibrium between socioeconomic growth and environmental sustainability. We propose approaches to monitor and quantify sustainable development along the Shared Socioeconomic Pathways (SSPs), including mathematically derived scoring algorithms, and machine learning methods. These integrate socioeconomic and environmental datasets, to produce an interpretable metric for SSP alignment. An initial study demonstrates promising results, laying the groundwork for the application of different methods to the monitoring of sustainable global development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04416v1-abstract-full').style.display = 'none'; document.getElementById('2312.04416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure. Presented at NeurIPS 2023 Workshop: Tackling Climate Change with Machine Learning</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16941">arXiv:2310.16941</a> <span> [<a href="https://arxiv.org/pdf/2310.16941">pdf</a>, <a href="https://arxiv.org/format/2310.16941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Exploring Behavior Discovery Methods for Heterogeneous Swarms of Limited-Capability Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mattson%2C+C">Connor Mattson</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+C">Jeremy C. Clark</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+D+S">Daniel S. Brown</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16941v1-abstract-short" style="display: inline;"> We study the problem of determining the emergent behaviors that are possible given a functionally heterogeneous swarm of robots with limited capabilities. Prior work has considered behavior search for homogeneous swarms and proposed the use of novelty search over either a hand-specified or learned behavior space followed by clustering to return a taxonomy of emergent behaviors to the user. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16941v1-abstract-full').style.display = 'inline'; document.getElementById('2310.16941v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16941v1-abstract-full" style="display: none;"> We study the problem of determining the emergent behaviors that are possible given a functionally heterogeneous swarm of robots with limited capabilities. Prior work has considered behavior search for homogeneous swarms and proposed the use of novelty search over either a hand-specified or learned behavior space followed by clustering to return a taxonomy of emergent behaviors to the user. In this paper, we seek to better understand the role of novelty search and the efficacy of using clustering to discover novel emergent behaviors. Through a large set of experiments and ablations, we analyze the effect of representations, evolutionary search, and various clustering methods in the search for novel behaviors in a heterogeneous swarm. Our results indicate that prior methods fail to discover many interesting behaviors and that an iterative human-in-the-loop discovery process discovers more behaviors than random search, swarm chemistry, and automated behavior discovery. The combined discoveries of our experiments uncover 23 emergent behaviors, 18 of which are novel discoveries. To the best of our knowledge, these are the first known emergent behaviors for heterogeneous swarms of computation-free agents. Videos, code, and appendix are available at the project website: https://sites.google.com/view/heterogeneous-bd-methods <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16941v1-abstract-full').style.display = 'none'; document.getElementById('2310.16941v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 9 figures, To be published in Proceedings IEEE International Symposium on Multi-Robot & Multi-Agent Systems (MRS 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09440">arXiv:2310.09440</a> <span> [<a href="https://arxiv.org/pdf/2310.09440">pdf</a>, <a href="https://arxiv.org/format/2310.09440">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Target Variable Engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jessica Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09440v1-abstract-short" style="display: inline;"> How does the formulation of a target variable affect performance within the ML pipeline? The experiments in this study examine numeric targets that have been binarized by comparing against a threshold. We compare the predictive performance of regression models trained to predict the numeric targets vs. classifiers trained to predict their binarized counterparts. Specifically, we make this comparis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09440v1-abstract-full').style.display = 'inline'; document.getElementById('2310.09440v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09440v1-abstract-full" style="display: none;"> How does the formulation of a target variable affect performance within the ML pipeline? The experiments in this study examine numeric targets that have been binarized by comparing against a threshold. We compare the predictive performance of regression models trained to predict the numeric targets vs. classifiers trained to predict their binarized counterparts. Specifically, we make this comparison at every point of a randomized hyperparameter optimization search to understand the effect of computational resource budget on the tradeoff between the two. We find that regression requires significantly more computational effort to converge upon the optimal performance, and is more sensitive to both randomness and heuristic choices in the training process. Although classification can and does benefit from systematic hyperparameter tuning and model selection, the improvements are much less than for regression. This work comprises the first systematic comparison of regression and classification within the framework of computational resource requirements. Our findings contribute to calls for greater replicability and efficiency within the ML pipeline for the sake of building more sustainable and robust AI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09440v1-abstract-full').style.display = 'none'; document.getElementById('2310.09440v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.03715">arXiv:2310.03715</a> <span> [<a href="https://arxiv.org/pdf/2310.03715">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Artificial Intelligence Index Report 2023 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maslej%2C+N">Nestor Maslej</a>, <a href="/search/cs?searchtype=author&query=Fattorini%2C+L">Loredana Fattorini</a>, <a href="/search/cs?searchtype=author&query=Brynjolfsson%2C+E">Erik Brynjolfsson</a>, <a href="/search/cs?searchtype=author&query=Etchemendy%2C+J">John Etchemendy</a>, <a href="/search/cs?searchtype=author&query=Ligett%2C+K">Katrina Ligett</a>, <a href="/search/cs?searchtype=author&query=Lyons%2C+T">Terah Lyons</a>, <a href="/search/cs?searchtype=author&query=Manyika%2C+J">James Manyika</a>, <a href="/search/cs?searchtype=author&query=Ngo%2C+H">Helen Ngo</a>, <a href="/search/cs?searchtype=author&query=Niebles%2C+J+C">Juan Carlos Niebles</a>, <a href="/search/cs?searchtype=author&query=Parli%2C+V">Vanessa Parli</a>, <a href="/search/cs?searchtype=author&query=Shoham%2C+Y">Yoav Shoham</a>, <a href="/search/cs?searchtype=author&query=Wald%2C+R">Russell Wald</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jack Clark</a>, <a href="/search/cs?searchtype=author&query=Perrault%2C+R">Raymond Perrault</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.03715v1-abstract-short" style="display: inline;"> Welcome to the sixth edition of the AI Index Report. This year, the report introduces more original data than any previous edition, including a new chapter on AI public opinion, a more thorough technical performance chapter, original analysis about large language and multimodal models, detailed trends in global AI legislation records, a study of the environmental impact of AI systems, and more. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03715v1-abstract-full').style.display = 'inline'; document.getElementById('2310.03715v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.03715v1-abstract-full" style="display: none;"> Welcome to the sixth edition of the AI Index Report. This year, the report introduces more original data than any previous edition, including a new chapter on AI public opinion, a more thorough technical performance chapter, original analysis about large language and multimodal models, detailed trends in global AI legislation records, a study of the environmental impact of AI systems, and more. The AI Index Report tracks, collates, distills, and visualizes data related to artificial intelligence. Our mission is to provide unbiased, rigorously vetted, broadly sourced data in order for policymakers, researchers, executives, journalists, and the general public to develop a more thorough and nuanced understanding of the complex field of AI. The report aims to be the world's most credible and authoritative source for data and insights about AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03715v1-abstract-full').style.display = 'none'; document.getElementById('2310.03715v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.15965">arXiv:2309.15965</a> <span> [<a href="https://arxiv.org/pdf/2309.15965">pdf</a>, <a href="https://arxiv.org/format/2309.15965">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Metric Geometry">math.MG</span> </div> </div> <p class="title is-5 mathjax"> TraCE: Trajectory Counterfactual Explanation Scores </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+J+N">Jeffrey N. Clark</a>, <a href="/search/cs?searchtype=author&query=Small%2C+E+A">Edward A. Small</a>, <a href="/search/cs?searchtype=author&query=Keshtmand%2C+N">Nawid Keshtmand</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+M+W+L">Michelle W. L. Wan</a>, <a href="/search/cs?searchtype=author&query=Mayoral%2C+E+F">Elena Fillola Mayoral</a>, <a href="/search/cs?searchtype=author&query=Werner%2C+E">Enrico Werner</a>, <a href="/search/cs?searchtype=author&query=Bourdeaux%2C+C+P">Christopher P. Bourdeaux</a>, <a href="/search/cs?searchtype=author&query=Santos-Rodriguez%2C+R">Raul Santos-Rodriguez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.15965v2-abstract-short" style="display: inline;"> Counterfactual explanations, and their associated algorithmic recourse, are typically leveraged to understand, explain, and potentially alter a prediction coming from a black-box classifier. In this paper, we propose to extend the use of counterfactuals to evaluate progress in sequential decision making tasks. To this end, we introduce a model-agnostic modular framework, TraCE (Trajectory Counterf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15965v2-abstract-full').style.display = 'inline'; document.getElementById('2309.15965v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.15965v2-abstract-full" style="display: none;"> Counterfactual explanations, and their associated algorithmic recourse, are typically leveraged to understand, explain, and potentially alter a prediction coming from a black-box classifier. In this paper, we propose to extend the use of counterfactuals to evaluate progress in sequential decision making tasks. To this end, we introduce a model-agnostic modular framework, TraCE (Trajectory Counterfactual Explanation) scores, which is able to distill and condense progress in highly complex scenarios into a single value. We demonstrate TraCE's utility across domains by showcasing its main properties in two case studies spanning healthcare and climate change. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15965v2-abstract-full').style.display = 'none'; document.getElementById('2309.15965v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures, appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.07730">arXiv:2309.07730</a> <span> [<a href="https://arxiv.org/pdf/2309.07730">pdf</a>, <a href="https://arxiv.org/format/2309.07730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AIDPS:Adaptive Intrusion Detection and Prevention System for Underwater Acoustic Sensor Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+S">Soumadeep Das</a>, <a href="/search/cs?searchtype=author&query=Pasikhani%2C+A+M">Aryan Mohammadi Pasikhani</a>, <a href="/search/cs?searchtype=author&query=Gope%2C+P">Prosanta Gope</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+A">John A. Clark</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+C">Chintan Patel</a>, <a href="/search/cs?searchtype=author&query=Sikdar%2C+B">Biplab Sikdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.07730v1-abstract-short" style="display: inline;"> Underwater Acoustic Sensor Networks (UW-ASNs) are predominantly used for underwater environments and find applications in many areas. However, a lack of security considerations, the unstable and challenging nature of the underwater environment, and the resource-constrained nature of the sensor nodes used for UW-ASNs (which makes them incapable of adopting security primitives) make the UW-ASN prone… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.07730v1-abstract-full').style.display = 'inline'; document.getElementById('2309.07730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.07730v1-abstract-full" style="display: none;"> Underwater Acoustic Sensor Networks (UW-ASNs) are predominantly used for underwater environments and find applications in many areas. However, a lack of security considerations, the unstable and challenging nature of the underwater environment, and the resource-constrained nature of the sensor nodes used for UW-ASNs (which makes them incapable of adopting security primitives) make the UW-ASN prone to vulnerabilities. This paper proposes an Adaptive decentralised Intrusion Detection and Prevention System called AIDPS for UW-ASNs. The proposed AIDPS can improve the security of the UW-ASNs so that they can efficiently detect underwater-related attacks (e.g., blackhole, grayhole and flooding attacks). To determine the most effective configuration of the proposed construction, we conduct a number of experiments using several state-of-the-art machine learning algorithms (e.g., Adaptive Random Forest (ARF), light gradient-boosting machine, and K-nearest neighbours) and concept drift detection algorithms (e.g., ADWIN, kdqTree, and Page-Hinkley). Our experimental results show that incremental ARF using ADWIN provides optimal performance when implemented with One-class support vector machine (SVM) anomaly-based detectors. Furthermore, our extensive evaluation results also show that the proposed scheme outperforms state-of-the-art bench-marking methods while providing a wider range of desirable features such as scalability and complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.07730v1-abstract-full').style.display = 'none'; document.getElementById('2309.07730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.04663">arXiv:2309.04663</a> <span> [<a href="https://arxiv.org/pdf/2309.04663">pdf</a>, <a href="https://arxiv.org/format/2309.04663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FIAT: Fusing learning paradigms with Instruction-Accelerated Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.04663v2-abstract-short" style="display: inline;"> Learning paradigms for large language models (LLMs) currently tend to fall within either in-context learning (ICL) or full fine-tuning. Each of these comes with their own trade-offs based on available data, model size, compute cost, ease-of-use, and final quality with neither solution performing well across-the-board. In this article, we first describe ICL and fine-tuning paradigms in a way that h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04663v2-abstract-full').style.display = 'inline'; document.getElementById('2309.04663v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.04663v2-abstract-full" style="display: none;"> Learning paradigms for large language models (LLMs) currently tend to fall within either in-context learning (ICL) or full fine-tuning. Each of these comes with their own trade-offs based on available data, model size, compute cost, ease-of-use, and final quality with neither solution performing well across-the-board. In this article, we first describe ICL and fine-tuning paradigms in a way that highlights their natural connections. Based on these connections, we propose a new learning paradigm called FIAT that fuses the best of these paradigms together, enabling prompt-engineered instructions and chain-of-thought reasoning with the very largest models while also using similar methods to perform parameter updates on a modestly-sized LLM with parameter-efficient tuning. We evaluate FIAT's effectiveness on a variety of multilingual tasks and observe that FIAT performs better than both ICL and fine-tuning at scales ranging from 100-10,000 training examples. We hope that FIAT provides a practical way of harnessing the full potential of LLMs without needing to make a hard choice between learning paradigms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04663v2-abstract-full').style.display = 'none'; document.getElementById('2309.04663v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.04211">arXiv:2309.04211</a> <span> [<a href="https://arxiv.org/pdf/2309.04211">pdf</a>, <a href="https://arxiv.org/format/2309.04211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Counterfactual Explanations via Locally-guided Sequential Algorithmic Recourse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Small%2C+E+A">Edward A. Small</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+N">Jeffrey N. Clark</a>, <a href="/search/cs?searchtype=author&query=McWilliams%2C+C+J">Christopher J. McWilliams</a>, <a href="/search/cs?searchtype=author&query=Sokol%2C+K">Kacper Sokol</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+J">Jeffrey Chan</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a>, <a href="/search/cs?searchtype=author&query=Santos-Rodriguez%2C+R">Raul Santos-Rodriguez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.04211v1-abstract-short" style="display: inline;"> Counterfactuals operationalised through algorithmic recourse have become a powerful tool to make artificial intelligence systems explainable. Conceptually, given an individual classified as y -- the factual -- we seek actions such that their prediction becomes the desired class y' -- the counterfactual. This process offers algorithmic recourse that is (1) easy to customise and interpret, and (2) d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04211v1-abstract-full').style.display = 'inline'; document.getElementById('2309.04211v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.04211v1-abstract-full" style="display: none;"> Counterfactuals operationalised through algorithmic recourse have become a powerful tool to make artificial intelligence systems explainable. Conceptually, given an individual classified as y -- the factual -- we seek actions such that their prediction becomes the desired class y' -- the counterfactual. This process offers algorithmic recourse that is (1) easy to customise and interpret, and (2) directly aligned with the goals of each individual. However, the properties of a "good" counterfactual are still largely debated; it remains an open challenge to effectively locate a counterfactual along with its corresponding recourse. Some strategies use gradient-driven methods, but these offer no guarantees on the feasibility of the recourse and are open to adversarial attacks on carefully created manifolds. This can lead to unfairness and lack of robustness. Other methods are data-driven, which mostly addresses the feasibility problem at the expense of privacy, security and secrecy as they require access to the entire training data set. Here, we introduce LocalFACE, a model-agnostic technique that composes feasible and actionable counterfactual explanations using locally-acquired information at each step of the algorithmic recourse. Our explainer preserves the privacy of users by only leveraging data that it specifically requires to construct actionable algorithmic recourse, and protects the model by offering transparency solely in the regions deemed necessary for the intervention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04211v1-abstract-full').style.display = 'none'; document.getElementById('2309.04211v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures, 3 appendix pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07286">arXiv:2308.07286</a> <span> [<a href="https://arxiv.org/pdf/2308.07286">pdf</a>, <a href="https://arxiv.org/format/2308.07286">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Devil is in the Errors: Leveraging Large Language Models for Fine-grained Machine Translation Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fernandes%2C+P">Patrick Fernandes</a>, <a href="/search/cs?searchtype=author&query=Deutsch%2C+D">Daniel Deutsch</a>, <a href="/search/cs?searchtype=author&query=Finkelstein%2C+M">Mara Finkelstein</a>, <a href="/search/cs?searchtype=author&query=Riley%2C+P">Parker Riley</a>, <a href="/search/cs?searchtype=author&query=Martins%2C+A+F+T">Andr茅 F. T. Martins</a>, <a href="/search/cs?searchtype=author&query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+A">Ankush Garg</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Freitag%2C+M">Markus Freitag</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07286v1-abstract-short" style="display: inline;"> Automatic evaluation of machine translation (MT) is a critical tool driving the rapid iterative development of MT systems. While considerable progress has been made on estimating a single scalar quality score, current metrics lack the informativeness of more detailed schemes that annotate individual errors, such as Multidimensional Quality Metrics (MQM). In this paper, we help fill this gap by pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07286v1-abstract-full').style.display = 'inline'; document.getElementById('2308.07286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07286v1-abstract-full" style="display: none;"> Automatic evaluation of machine translation (MT) is a critical tool driving the rapid iterative development of MT systems. While considerable progress has been made on estimating a single scalar quality score, current metrics lack the informativeness of more detailed schemes that annotate individual errors, such as Multidimensional Quality Metrics (MQM). In this paper, we help fill this gap by proposing AutoMQM, a prompting technique which leverages the reasoning and in-context learning capabilities of large language models (LLMs) and asks them to identify and categorize errors in translations. We start by evaluating recent LLMs, such as PaLM and PaLM-2, through simple score prediction prompting, and we study the impact of labeled data through in-context learning and finetuning. We then evaluate AutoMQM with PaLM-2 models, and we find that it improves performance compared to just prompting for scores (with particularly large gains for larger models) while providing interpretability through error spans that align with human annotations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07286v1-abstract-full').style.display = 'none'; document.getElementById('2308.07286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.08590">arXiv:2307.08590</a> <span> [<a href="https://arxiv.org/pdf/2307.08590">pdf</a>, <a href="https://arxiv.org/format/2307.08590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> The Effect of Data Visualisation Quality and Task Density on Human-Swarm Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abioye%2C+A+O">Ayodeji O. Abioye</a>, <a href="/search/cs?searchtype=author&query=Naiseh%2C+M">Mohammad Naiseh</a>, <a href="/search/cs?searchtype=author&query=Hunt%2C+W">William Hunt</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jediah Clark</a>, <a href="/search/cs?searchtype=author&query=Ramchurn%2C+S+D">Sarvapali D. Ramchurn</a>, <a href="/search/cs?searchtype=author&query=Soorati%2C+M+D">Mohammad D. Soorati</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.08590v1-abstract-short" style="display: inline;"> Despite the advantages of having robot swarms, human supervision is required for real-world applications. The performance of the human-swarm system depends on several factors including the data availability for the human operators. In this paper, we study the human factors aspect of the human-swarm interaction and investigate how having access to high-quality data can affect the performance of the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08590v1-abstract-full').style.display = 'inline'; document.getElementById('2307.08590v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.08590v1-abstract-full" style="display: none;"> Despite the advantages of having robot swarms, human supervision is required for real-world applications. The performance of the human-swarm system depends on several factors including the data availability for the human operators. In this paper, we study the human factors aspect of the human-swarm interaction and investigate how having access to high-quality data can affect the performance of the human-swarm system - the number of tasks completed and the human trust level in operation. We designed an experiment where a human operator is tasked to operate a swarm to identify casualties in an area within a given time period. One group of operators had the option to request high-quality pictures while the other group had to base their decision on the available low-quality images. We performed a user study with 120 participants and recorded their success rate (directly logged via the simulation platform) as well as their workload and trust level (measured through a questionnaire after completing a human-swarm scenario). The findings from our study indicated that the group granted access to high-quality data exhibited an increased workload and placed greater trust in the swarm, thus confirming our initial hypothesis. However, we also found that the number of accurately identified casualties did not significantly vary between the two groups, suggesting that data quality had no impact on the successful completion of tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08590v1-abstract-full').style.display = 'none'; document.getElementById('2307.08590v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted for presentation at the IEEE RO-MAN 2023 Conference and would be published as part of the conference proceedings. 8 pages. 5 figures. 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.16388">arXiv:2306.16388</a> <span> [<a href="https://arxiv.org/pdf/2306.16388">pdf</a>, <a href="https://arxiv.org/format/2306.16388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Measuring the Representation of Subjective Global Opinions in Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Durmus%2C+E">Esin Durmus</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+K">Karina Nguyen</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+T+I">Thomas I. Liao</a>, <a href="/search/cs?searchtype=author&query=Schiefer%2C+N">Nicholas Schiefer</a>, <a href="/search/cs?searchtype=author&query=Askell%2C+A">Amanda Askell</a>, <a href="/search/cs?searchtype=author&query=Bakhtin%2C+A">Anton Bakhtin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Carol Chen</a>, <a href="/search/cs?searchtype=author&query=Hatfield-Dodds%2C+Z">Zac Hatfield-Dodds</a>, <a href="/search/cs?searchtype=author&query=Hernandez%2C+D">Danny Hernandez</a>, <a href="/search/cs?searchtype=author&query=Joseph%2C+N">Nicholas Joseph</a>, <a href="/search/cs?searchtype=author&query=Lovitt%2C+L">Liane Lovitt</a>, <a href="/search/cs?searchtype=author&query=McCandlish%2C+S">Sam McCandlish</a>, <a href="/search/cs?searchtype=author&query=Sikder%2C+O">Orowa Sikder</a>, <a href="/search/cs?searchtype=author&query=Tamkin%2C+A">Alex Tamkin</a>, <a href="/search/cs?searchtype=author&query=Thamkul%2C+J">Janel Thamkul</a>, <a href="/search/cs?searchtype=author&query=Kaplan%2C+J">Jared Kaplan</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jack Clark</a>, <a href="/search/cs?searchtype=author&query=Ganguli%2C+D">Deep Ganguli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.16388v2-abstract-short" style="display: inline;"> Large language models (LLMs) may not equitably represent diverse global perspectives on societal issues. In this paper, we develop a quantitative framework to evaluate whose opinions model-generated responses are more similar to. We first build a dataset, GlobalOpinionQA, comprised of questions and answers from cross-national surveys designed to capture diverse opinions on global issues across dif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16388v2-abstract-full').style.display = 'inline'; document.getElementById('2306.16388v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.16388v2-abstract-full" style="display: none;"> Large language models (LLMs) may not equitably represent diverse global perspectives on societal issues. In this paper, we develop a quantitative framework to evaluate whose opinions model-generated responses are more similar to. We first build a dataset, GlobalOpinionQA, comprised of questions and answers from cross-national surveys designed to capture diverse opinions on global issues across different countries. Next, we define a metric that quantifies the similarity between LLM-generated survey responses and human responses, conditioned on country. With our framework, we run three experiments on an LLM trained to be helpful, honest, and harmless with Constitutional AI. By default, LLM responses tend to be more similar to the opinions of certain populations, such as those from the USA, and some European and South American countries, highlighting the potential for biases. When we prompt the model to consider a particular country's perspective, responses shift to be more similar to the opinions of the prompted populations, but can reflect harmful cultural stereotypes. When we translate GlobalOpinionQA questions to a target language, the model's responses do not necessarily become the most similar to the opinions of speakers of those languages. We release our dataset for others to use and build on. Our data is at https://huggingface.co/datasets/Anthropic/llm_global_opinions. We also provide an interactive visualization at https://llmglobalvalues.anthropic.com. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16388v2-abstract-full').style.display = 'none'; document.getElementById('2306.16388v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15324">arXiv:2305.15324</a> <span> [<a href="https://arxiv.org/pdf/2305.15324">pdf</a>, <a href="https://arxiv.org/format/2305.15324">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Model evaluation for extreme risks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shevlane%2C+T">Toby Shevlane</a>, <a href="/search/cs?searchtype=author&query=Farquhar%2C+S">Sebastian Farquhar</a>, <a href="/search/cs?searchtype=author&query=Garfinkel%2C+B">Ben Garfinkel</a>, <a href="/search/cs?searchtype=author&query=Phuong%2C+M">Mary Phuong</a>, <a href="/search/cs?searchtype=author&query=Whittlestone%2C+J">Jess Whittlestone</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+J">Jade Leung</a>, <a href="/search/cs?searchtype=author&query=Kokotajlo%2C+D">Daniel Kokotajlo</a>, <a href="/search/cs?searchtype=author&query=Marchal%2C+N">Nahema Marchal</a>, <a href="/search/cs?searchtype=author&query=Anderljung%2C+M">Markus Anderljung</a>, <a href="/search/cs?searchtype=author&query=Kolt%2C+N">Noam Kolt</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+L">Lewis Ho</a>, <a href="/search/cs?searchtype=author&query=Siddarth%2C+D">Divya Siddarth</a>, <a href="/search/cs?searchtype=author&query=Avin%2C+S">Shahar Avin</a>, <a href="/search/cs?searchtype=author&query=Hawkins%2C+W">Will Hawkins</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+B">Been Kim</a>, <a href="/search/cs?searchtype=author&query=Gabriel%2C+I">Iason Gabriel</a>, <a href="/search/cs?searchtype=author&query=Bolina%2C+V">Vijay Bolina</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jack Clark</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=Christiano%2C+P">Paul Christiano</a>, <a href="/search/cs?searchtype=author&query=Dafoe%2C+A">Allan Dafoe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15324v2-abstract-short" style="display: inline;"> Current approaches to building general-purpose AI systems tend to produce systems with both beneficial and harmful capabilities. Further progress in AI development could lead to capabilities that pose extreme risks, such as offensive cyber capabilities or strong manipulation skills. We explain why model evaluation is critical for addressing extreme risks. Developers must be able to identify danger… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15324v2-abstract-full').style.display = 'inline'; document.getElementById('2305.15324v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15324v2-abstract-full" style="display: none;"> Current approaches to building general-purpose AI systems tend to produce systems with both beneficial and harmful capabilities. Further progress in AI development could lead to capabilities that pose extreme risks, such as offensive cyber capabilities or strong manipulation skills. We explain why model evaluation is critical for addressing extreme risks. Developers must be able to identify dangerous capabilities (through "dangerous capability evaluations") and the propensity of models to apply their capabilities for harm (through "alignment evaluations"). These evaluations will become critical for keeping policymakers and other stakeholders informed, and for making responsible decisions about model training, deployment, and security. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15324v2-abstract-full').style.display = 'none'; document.getElementById('2305.15324v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Fixed typos; added citation</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> K.4.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14332">arXiv:2305.14332</a> <span> [<a href="https://arxiv.org/pdf/2305.14332">pdf</a>, <a href="https://arxiv.org/format/2305.14332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating and Modeling Attribution for Cross-Lingual Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Muller%2C+B">Benjamin Muller</a>, <a href="/search/cs?searchtype=author&query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Kwiatkowski%2C+T">Tom Kwiatkowski</a>, <a href="/search/cs?searchtype=author&query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&query=Soares%2C+L+B">Livio Baldini Soares</a>, <a href="/search/cs?searchtype=author&query=Aharoni%2C+R">Roee Aharoni</a>, <a href="/search/cs?searchtype=author&query=Herzig%2C+J">Jonathan Herzig</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14332v2-abstract-short" style="display: inline;"> Trustworthy answer content is abundant in many high-resource languages and is instantly accessible through question answering systems, yet this content can be hard to access for those that do not speak these languages. The leap forward in cross-lingual modeling quality offered by generative language models offers much promise, yet their raw generations often fall short in factuality. To improve tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14332v2-abstract-full').style.display = 'inline'; document.getElementById('2305.14332v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14332v2-abstract-full" style="display: none;"> Trustworthy answer content is abundant in many high-resource languages and is instantly accessible through question answering systems, yet this content can be hard to access for those that do not speak these languages. The leap forward in cross-lingual modeling quality offered by generative language models offers much promise, yet their raw generations often fall short in factuality. To improve trustworthiness in these systems, a promising direction is to attribute the answer to a retrieved source, possibly in a content-rich language different from the query. Our work is the first to study attribution for cross-lingual question answering. First, we collect data in 5 languages to assess the attribution level of a state-of-the-art cross-lingual QA system. To our surprise, we find that a substantial portion of the answers is not attributable to any retrieved passages (up to 50% of answers exactly matching a gold reference) despite the system being able to attend directly to the retrieved text. Second, to address this poor attribution level, we experiment with a wide range of attribution detection techniques. We find that Natural Language Inference models and PaLM 2 fine-tuned on a very small amount of attribution data can accurately detect attribution. Based on these models, we improve the attribution level of a cross-lingual question-answering system. Overall, we show that current academic generative cross-lingual QA systems have substantial shortcomings in attribution and we build tooling to mitigate these issues. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14332v2-abstract-full').style.display = 'none'; document.getElementById('2305.14332v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a long paper at EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11938">arXiv:2305.11938</a> <span> [<a href="https://arxiv.org/pdf/2305.11938">pdf</a>, <a href="https://arxiv.org/format/2305.11938">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.findings-emnlp.125">10.18653/v1/2023.findings-emnlp.125 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> XTREME-UP: A User-Centric Scarce-Data Benchmark for Under-Represented Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Gutkin%2C+A">Alexander Gutkin</a>, <a href="/search/cs?searchtype=author&query=Kale%2C+M">Mihir Kale</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M">Min Ma</a>, <a href="/search/cs?searchtype=author&query=Nicosia%2C+M">Massimo Nicosia</a>, <a href="/search/cs?searchtype=author&query=Rijhwani%2C+S">Shruti Rijhwani</a>, <a href="/search/cs?searchtype=author&query=Riley%2C+P">Parker Riley</a>, <a href="/search/cs?searchtype=author&query=Sarr%2C+J+A">Jean-Michel A. Sarr</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&query=Katanova%2C+A">Anna Katanova</a>, <a href="/search/cs?searchtype=author&query=Kirov%2C+C">Christo Kirov</a>, <a href="/search/cs?searchtype=author&query=Dickinson%2C+D+L">Dana L. Dickinson</a>, <a href="/search/cs?searchtype=author&query=Roark%2C+B">Brian Roark</a>, <a href="/search/cs?searchtype=author&query=Samanta%2C+B">Bidisha Samanta</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+C">Connie Tao</a>, <a href="/search/cs?searchtype=author&query=Adelani%2C+D+I">David I. Adelani</a>, <a href="/search/cs?searchtype=author&query=Axelrod%2C+V">Vera Axelrod</a>, <a href="/search/cs?searchtype=author&query=Caswell%2C+I">Isaac Caswell</a>, <a href="/search/cs?searchtype=author&query=Cherry%2C+C">Colin Cherry</a>, <a href="/search/cs?searchtype=author&query=Garrette%2C+D">Dan Garrette</a>, <a href="/search/cs?searchtype=author&query=Ingle%2C+R">Reeve Ingle</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11938v2-abstract-short" style="display: inline;"> Data scarcity is a crucial issue for the development of highly multilingual NLP systems. Yet for many under-represented languages (ULs) -- languages for which NLP re-search is particularly far behind in meeting user needs -- it is feasible to annotate small amounts of data. Motivated by this, we propose XTREME-UP, a benchmark defined by: its focus on the scarce-data scenario rather than zero-shot;… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11938v2-abstract-full').style.display = 'inline'; document.getElementById('2305.11938v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11938v2-abstract-full" style="display: none;"> Data scarcity is a crucial issue for the development of highly multilingual NLP systems. Yet for many under-represented languages (ULs) -- languages for which NLP re-search is particularly far behind in meeting user needs -- it is feasible to annotate small amounts of data. Motivated by this, we propose XTREME-UP, a benchmark defined by: its focus on the scarce-data scenario rather than zero-shot; its focus on user-centric tasks -- tasks with broad adoption by speakers of high-resource languages; and its focus on under-represented languages where this scarce-data scenario tends to be most realistic. XTREME-UP evaluates the capabilities of language models across 88 under-represented languages over 9 key user-centric technologies including ASR, OCR, MT, and information access tasks that are of general utility. We create new datasets for OCR, autocomplete, semantic parsing, and transliteration, and build on and refine existing datasets for other tasks. XTREME-UP provides methodology for evaluating many modeling scenarios including text-only, multi-modal (vision, audio, and text),supervised parameter tuning, and in-context learning. We evaluate commonly used models on the benchmark. We release all code and scripts to train and evaluate models <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11938v2-abstract-full').style.display = 'none'; document.getElementById('2305.11938v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10403">arXiv:2305.10403</a> <span> [<a href="https://arxiv.org/pdf/2305.10403">pdf</a>, <a href="https://arxiv.org/format/2305.10403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PaLM 2 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Lepikhin%2C+D">Dmitry Lepikhin</a>, <a href="/search/cs?searchtype=author&query=Passos%2C+A">Alexandre Passos</a>, <a href="/search/cs?searchtype=author&query=Shakeri%2C+S">Siamak Shakeri</a>, <a href="/search/cs?searchtype=author&query=Taropa%2C+E">Emanuel Taropa</a>, <a href="/search/cs?searchtype=author&query=Bailey%2C+P">Paige Bailey</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+E">Eric Chu</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Shafey%2C+L+E">Laurent El Shafey</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yanping Huang</a>, <a href="/search/cs?searchtype=author&query=Meier-Hellstern%2C+K">Kathy Meier-Hellstern</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&query=Moreira%2C+E">Erica Moreira</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Robinson%2C+K">Kevin Robinson</a>, <a href="/search/cs?searchtype=author&query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+K">Kefan Xiao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuanzhong Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yujing Zhang</a>, <a href="/search/cs?searchtype=author&query=Abrego%2C+G+H">Gustavo Hernandez Abrego</a> , et al. (103 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10403v3-abstract-short" style="display: inline;"> We introduce PaLM 2, a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives. Through extensive evaluations on English and multilingual language, and reasoning tasks, we demonstrate that PaLM 2 has significantly improved quality on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10403v3-abstract-full').style.display = 'inline'; document.getElementById('2305.10403v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10403v3-abstract-full" style="display: none;"> We introduce PaLM 2, a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives. Through extensive evaluations on English and multilingual language, and reasoning tasks, we demonstrate that PaLM 2 has significantly improved quality on downstream tasks across different model sizes, while simultaneously exhibiting faster and more efficient inference compared to PaLM. This improved efficiency enables broader deployment while also allowing the model to respond faster, for a more natural pace of interaction. PaLM 2 demonstrates robust reasoning capabilities exemplified by large improvements over PaLM on BIG-Bench and other reasoning tasks. PaLM 2 exhibits stable performance on a suite of responsible AI evaluations, and enables inference-time control over toxicity without additional overhead or impact on other capabilities. Overall, PaLM 2 achieves state-of-the-art performance across a diverse set of tasks and capabilities. When discussing the PaLM 2 family, it is important to distinguish between pre-trained models (of various sizes), fine-tuned variants of these models, and the user-facing products that use these models. In particular, user-facing products typically include additional pre- and post-processing steps. Additionally, the underlying models may evolve over time. Therefore, one should not expect the performance of user-facing products to exactly match the results reported in this report. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10403v3-abstract-full').style.display = 'none'; document.getElementById('2305.10403v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.06897">arXiv:2305.06897</a> <span> [<a href="https://arxiv.org/pdf/2305.06897">pdf</a>, <a href="https://arxiv.org/format/2305.06897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> AfriQA: Cross-lingual Open-Retrieval Question Answering for African Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ogundepo%2C+O">Odunayo Ogundepo</a>, <a href="/search/cs?searchtype=author&query=Gwadabe%2C+T+R">Tajuddeen R. Gwadabe</a>, <a href="/search/cs?searchtype=author&query=Rivera%2C+C+E">Clara E. Rivera</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&query=Adelani%2C+D+I">David Ifeoluwa Adelani</a>, <a href="/search/cs?searchtype=author&query=Dossou%2C+B+F+P">Bonaventure F. P. Dossou</a>, <a href="/search/cs?searchtype=author&query=DIOP%2C+A+A">Abdou Aziz DIOP</a>, <a href="/search/cs?searchtype=author&query=Sikasote%2C+C">Claytone Sikasote</a>, <a href="/search/cs?searchtype=author&query=Hacheme%2C+G">Gilles Hacheme</a>, <a href="/search/cs?searchtype=author&query=Buzaaba%2C+H">Happy Buzaaba</a>, <a href="/search/cs?searchtype=author&query=Ezeani%2C+I">Ignatius Ezeani</a>, <a href="/search/cs?searchtype=author&query=Mabuya%2C+R">Rooweither Mabuya</a>, <a href="/search/cs?searchtype=author&query=Osei%2C+S">Salomey Osei</a>, <a href="/search/cs?searchtype=author&query=Emezue%2C+C">Chris Emezue</a>, <a href="/search/cs?searchtype=author&query=Kahira%2C+A+N">Albert Njoroge Kahira</a>, <a href="/search/cs?searchtype=author&query=Muhammad%2C+S+H">Shamsuddeen H. Muhammad</a>, <a href="/search/cs?searchtype=author&query=Oladipo%2C+A">Akintunde Oladipo</a>, <a href="/search/cs?searchtype=author&query=Owodunni%2C+A+T">Abraham Toluwase Owodunni</a>, <a href="/search/cs?searchtype=author&query=Tonja%2C+A+L">Atnafu Lambebo Tonja</a>, <a href="/search/cs?searchtype=author&query=Shode%2C+I">Iyanuoluwa Shode</a>, <a href="/search/cs?searchtype=author&query=Asai%2C+A">Akari Asai</a>, <a href="/search/cs?searchtype=author&query=Ajayi%2C+T+O">Tunde Oluwaseyi Ajayi</a>, <a href="/search/cs?searchtype=author&query=Siro%2C+C">Clemencia Siro</a>, <a href="/search/cs?searchtype=author&query=Arthur%2C+S">Steven Arthur</a> , et al. (27 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.06897v1-abstract-short" style="display: inline;"> African languages have far less in-language content available digitally, making it challenging for question answering systems to satisfy the information needs of users. Cross-lingual open-retrieval question answering (XOR QA) systems -- those that retrieve answer content from other languages while serving people in their native language -- offer a means of filling this gap. To this end, we create… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06897v1-abstract-full').style.display = 'inline'; document.getElementById('2305.06897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.06897v1-abstract-full" style="display: none;"> African languages have far less in-language content available digitally, making it challenging for question answering systems to satisfy the information needs of users. Cross-lingual open-retrieval question answering (XOR QA) systems -- those that retrieve answer content from other languages while serving people in their native language -- offer a means of filling this gap. To this end, we create AfriQA, the first cross-lingual QA dataset with a focus on African languages. AfriQA includes 12,000+ XOR QA examples across 10 African languages. While previous datasets have focused primarily on languages where cross-lingual QA augments coverage from the target language, AfriQA focuses on languages where cross-lingual answer content is the only high-coverage source of answer content. Because of this, we argue that African languages are one of the most important and realistic use cases for XOR QA. Our experiments demonstrate the poor performance of automatic translation and multilingual retrieval methods. Overall, AfriQA proves challenging for state-of-the-art QA models. We hope that the dataset enables the development of more equitable QA technology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06897v1-abstract-full').style.display = 'none'; document.getElementById('2305.06897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Clark%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Clark%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository