CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–34 of 34 results for author: <span class="mathjax">Zou, A</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zou%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zou, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zou%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zou, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14296">arXiv:2502.14296</a> <span> [<a href="https://arxiv.org/pdf/2502.14296">pdf</a>, <a href="https://arxiv.org/format/2502.14296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> On the Trustworthiness of Generative Foundation Models: Guideline, Assessment, and Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chujie Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoran Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangqi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yujun Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanbo Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jiayi Ye</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jiawen Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qihui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Han Bao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhaoyi Liu</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+T">Tianrui Guan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dongping Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruoxi Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+K">Kehan Guo</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Kuen-Yew%2C+B+H">Bryan Hooi Kuen-Yew</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/cs?searchtype=author&query=Stengel-Eskin%2C+E">Elias Stengel-Eskin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+H">Hongzhi Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+H">Huaxiu Yao</a> , et al. (41 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14296v1-abstract-short" style="display: inline;"> Generative Foundation Models (GenFMs) have emerged as transformative tools. However, their widespread adoption raises critical concerns regarding trustworthiness across dimensions. This paper presents a comprehensive framework to address these challenges through three key contributions. First, we systematically review global AI governance laws and policies from governments and regulatory bodies, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14296v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14296v1-abstract-full" style="display: none;"> Generative Foundation Models (GenFMs) have emerged as transformative tools. However, their widespread adoption raises critical concerns regarding trustworthiness across dimensions. This paper presents a comprehensive framework to address these challenges through three key contributions. First, we systematically review global AI governance laws and policies from governments and regulatory bodies, as well as industry practices and standards. Based on this analysis, we propose a set of guiding principles for GenFMs, developed through extensive multidisciplinary collaboration that integrates technical, ethical, legal, and societal perspectives. Second, we introduce TrustGen, the first dynamic benchmarking platform designed to evaluate trustworthiness across multiple dimensions and model types, including text-to-image, large language, and vision-language models. TrustGen leverages modular components--metadata curation, test case generation, and contextual variation--to enable adaptive and iterative assessments, overcoming the limitations of static evaluation methods. Using TrustGen, we reveal significant progress in trustworthiness while identifying persistent challenges. Finally, we provide an in-depth discussion of the challenges and future directions for trustworthy GenFMs, which reveals the complex, evolving nature of trustworthiness, highlighting the nuanced trade-offs between utility and trustworthiness, and consideration for various downstream applications, identifying persistent challenges and providing a strategic roadmap for future research. This work establishes a holistic framework for advancing trustworthiness in GenAI, paving the way for safer and more responsible integration of GenFMs into critical applications. To facilitate advancement in the community, we release the toolkit for dynamic evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14296v1-abstract-full').style.display = 'none'; document.getElementById('2502.14296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14249">arXiv:2501.14249</a> <span> [<a href="https://arxiv.org/pdf/2501.14249">pdf</a>, <a href="https://arxiv.org/format/2501.14249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Humanity's Last Exam </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Ziwen Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Josephina Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hugh Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C+B+C">Chen Bo Calvin Zhang</a>, <a href="/search/cs?searchtype=author&query=Shaaban%2C+M">Mohamed Shaaban</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+J">John Ling</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Sean Shi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Michael Choi</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+A">Anish Agrawal</a>, <a href="/search/cs?searchtype=author&query=Chopra%2C+A">Arnav Chopra</a>, <a href="/search/cs?searchtype=author&query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+R">Ryan Kim</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&query=Hausenloy%2C+J">Jason Hausenloy</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+D">Daron Anderson</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+I+A">Imad Ali Shah</a>, <a href="/search/cs?searchtype=author&query=Doroshenko%2C+M">Mikhail Doroshenko</a>, <a href="/search/cs?searchtype=author&query=Stokes%2C+A+C">Alun Cennyth Stokes</a>, <a href="/search/cs?searchtype=author&query=Mahmood%2C+M">Mobeen Mahmood</a> , et al. (709 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14249v5-abstract-short" style="display: inline;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v5-abstract-full').style.display = 'inline'; document.getElementById('2501.14249v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14249v5-abstract-full" style="display: none;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of human knowledge, designed to be the final closed-ended academic benchmark of its kind with broad subject coverage. HLE consists of 2,700 questions across dozens of subjects, including mathematics, humanities, and the natural sciences. HLE is developed globally by subject-matter experts and consists of multiple-choice and short-answer questions suitable for automated grading. Each question has a known solution that is unambiguous and easily verifiable, but cannot be quickly answered via internet retrieval. State-of-the-art LLMs demonstrate low accuracy and calibration on HLE, highlighting a significant gap between current LLM capabilities and the expert human frontier on closed-ended academic questions. To inform research and policymaking upon a clear understanding of model capabilities, we publicly release HLE at https://lastexam.ai. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v5-abstract-full').style.display = 'none'; document.getElementById('2501.14249v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09024">arXiv:2410.09024</a> <span> [<a href="https://arxiv.org/pdf/2410.09024">pdf</a>, <a href="https://arxiv.org/format/2410.09024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AgentHarm: A Benchmark for Measuring Harmfulness of LLM Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Andriushchenko%2C+M">Maksym Andriushchenko</a>, <a href="/search/cs?searchtype=author&query=Souly%2C+A">Alexandra Souly</a>, <a href="/search/cs?searchtype=author&query=Dziemian%2C+M">Mateusz Dziemian</a>, <a href="/search/cs?searchtype=author&query=Duenas%2C+D">Derek Duenas</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+M">Maxwell Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Justin Wang</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Kolter%2C+Z">Zico Kolter</a>, <a href="/search/cs?searchtype=author&query=Fredrikson%2C+M">Matt Fredrikson</a>, <a href="/search/cs?searchtype=author&query=Winsor%2C+E">Eric Winsor</a>, <a href="/search/cs?searchtype=author&query=Wynne%2C+J">Jerome Wynne</a>, <a href="/search/cs?searchtype=author&query=Gal%2C+Y">Yarin Gal</a>, <a href="/search/cs?searchtype=author&query=Davies%2C+X">Xander Davies</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09024v2-abstract-short" style="display: inline;"> The robustness of LLMs to jailbreak attacks, where users design prompts to circumvent safety measures and misuse model capabilities, has been studied primarily for LLMs acting as simple chatbots. Meanwhile, LLM agents -- which use external tools and can execute multi-stage tasks -- may pose a greater risk if misused, but their robustness remains underexplored. To facilitate research on LLM agent m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09024v2-abstract-full').style.display = 'inline'; document.getElementById('2410.09024v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09024v2-abstract-full" style="display: none;"> The robustness of LLMs to jailbreak attacks, where users design prompts to circumvent safety measures and misuse model capabilities, has been studied primarily for LLMs acting as simple chatbots. Meanwhile, LLM agents -- which use external tools and can execute multi-stage tasks -- may pose a greater risk if misused, but their robustness remains underexplored. To facilitate research on LLM agent misuse, we propose a new benchmark called AgentHarm. The benchmark includes a diverse set of 110 explicitly malicious agent tasks (440 with augmentations), covering 11 harm categories including fraud, cybercrime, and harassment. In addition to measuring whether models refuse harmful agentic requests, scoring well on AgentHarm requires jailbroken agents to maintain their capabilities following an attack to complete a multi-step task. We evaluate a range of leading LLMs, and find (1) leading LLMs are surprisingly compliant with malicious agent requests without jailbreaking, (2) simple universal jailbreak templates can be adapted to effectively jailbreak agents, and (3) these jailbreaks enable coherent and malicious multi-step agent behavior and retain model capabilities. To enable simple and reliable evaluation of attacks and defenses for LLM-based agents, we publicly release AgentHarm at https://huggingface.co/datasets/ai-safety-institute/AgentHarm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09024v2-abstract-full').style.display = 'none'; document.getElementById('2410.09024v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01281">arXiv:2409.01281</a> <span> [<a href="https://arxiv.org/pdf/2409.01281">pdf</a>, <a href="https://arxiv.org/format/2409.01281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Path-Consistency: Prefix Enhancement for Efficient Inference in LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jiace Zhu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yingtao Shen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jie Zhao</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">An Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01281v1-abstract-short" style="display: inline;"> To enhance the reasoning capabilities of large language models (LLMs), self-consistency has gained significant popularity by combining multiple sampling with majority voting. However, the state-of-the-art self-consistency approaches consume substantial computational resources and lead to significant additional time costs due to the multiple sampling. This prevents its full potential from being rea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01281v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01281v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01281v1-abstract-full" style="display: none;"> To enhance the reasoning capabilities of large language models (LLMs), self-consistency has gained significant popularity by combining multiple sampling with majority voting. However, the state-of-the-art self-consistency approaches consume substantial computational resources and lead to significant additional time costs due to the multiple sampling. This prevents its full potential from being realized in scenarios where computational resources are critical. To improve the inference efficiency, this paper introduces \textit{path-consistency}, a method that leverages the confidence of answers generated in earlier branches to identify the prefix of the most promising path. By dynamically guiding the generation of subsequent branches based on this prefix, the \textit{path-consistency} mitigates both the errors and redundancies from random or less useful sampling in self-consistency. As a result, it can significantly accelerate the inference process by reducing the number of tokens generated. Our extensive empirical evaluation shows that the \textit{path-consistency} achieves significant acceleration in inference latency ranging from $7.8\%$ to $40.5\%$, while maintaining or even improving task accuracy across different datasets, including mathematical reasoning, common sense reasoning, symbolic reasoning, and code generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01281v1-abstract-full').style.display = 'none'; document.getElementById('2409.01281v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00761">arXiv:2408.00761</a> <span> [<a href="https://arxiv.org/pdf/2408.00761">pdf</a>, <a href="https://arxiv.org/format/2408.00761">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Tamper-Resistant Safeguards for Open-Weight LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tamirisa%2C+R">Rishub Tamirisa</a>, <a href="/search/cs?searchtype=author&query=Bharathi%2C+B">Bhrugu Bharathi</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Andy Zhou</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Suresh%2C+T">Tarun Suresh</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+M">Maxwell Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Justin Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rowan Wang</a>, <a href="/search/cs?searchtype=author&query=Arel%2C+R">Ron Arel</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00761v4-abstract-short" style="display: inline;"> Rapid advances in the capabilities of large language models (LLMs) have raised widespread concerns regarding their potential for malicious use. Open-weight LLMs present unique challenges, as existing safeguards lack robustness to tampering attacks that modify model weights. For example, recent works have demonstrated that refusal and unlearning safeguards can be trivially removed with a few steps… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00761v4-abstract-full').style.display = 'inline'; document.getElementById('2408.00761v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00761v4-abstract-full" style="display: none;"> Rapid advances in the capabilities of large language models (LLMs) have raised widespread concerns regarding their potential for malicious use. Open-weight LLMs present unique challenges, as existing safeguards lack robustness to tampering attacks that modify model weights. For example, recent works have demonstrated that refusal and unlearning safeguards can be trivially removed with a few steps of fine-tuning. These vulnerabilities necessitate new approaches for enabling the safe release of open-weight LLMs. We develop a method, called TAR, for building tamper-resistant safeguards into open-weight LLMs such that adversaries cannot remove the safeguards even after hundreds of steps of fine-tuning. In extensive evaluations and red teaming analyses, we find that our method greatly improves tamper-resistance while preserving benign capabilities. Our results demonstrate that progress on tamper-resistance is possible, opening up a promising new avenue to improve the safety and security of open-weight LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00761v4-abstract-full').style.display = 'none'; document.getElementById('2408.00761v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website: https://www.tamper-resistant-safeguards.com</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10701">arXiv:2407.10701</a> <span> [<a href="https://arxiv.org/pdf/2407.10701">pdf</a>, <a href="https://arxiv.org/format/2407.10701">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DOCBENCH: A Benchmark for Evaluating LLM-based Document Reading Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anni Zou</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenhao Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongming Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Kaixin Ma</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+D">Deng Cai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hai Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10701v1-abstract-short" style="display: inline;"> Recently, there has been a growing interest among large language model (LLM) developers in LLM-based document reading systems, which enable users to upload their own documents and pose questions related to the document contents, going beyond simple reading comprehension tasks. Consequently, these systems have been carefully designed to tackle challenges such as file parsing, metadata extraction, m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10701v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10701v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10701v1-abstract-full" style="display: none;"> Recently, there has been a growing interest among large language model (LLM) developers in LLM-based document reading systems, which enable users to upload their own documents and pose questions related to the document contents, going beyond simple reading comprehension tasks. Consequently, these systems have been carefully designed to tackle challenges such as file parsing, metadata extraction, multi-modal information understanding and long-context reading. However, no current benchmark exists to evaluate their performance in such scenarios, where a raw file and questions are provided as input, and a corresponding response is expected as output. In this paper, we introduce DocBench, a new benchmark designed to evaluate LLM-based document reading systems. Our benchmark involves a meticulously crafted process, including the recruitment of human annotators and the generation of synthetic questions. It includes 229 real documents and 1,102 questions, spanning across five different domains and four major types of questions. We evaluate both proprietary LLM-based systems accessible via web interfaces or APIs, and a parse-then-read pipeline employing open-source LLMs. Our evaluations reveal noticeable gaps between existing LLM-based document reading systems and human performance, underscoring the challenges of developing proficient systems. To summarize, DocBench aims to establish a standardized benchmark for evaluating LLM-based document reading systems under diverse real-world scenarios, thereby guiding future advancements in this research area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10701v1-abstract-full').style.display = 'none'; document.getElementById('2407.10701v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04313">arXiv:2406.04313</a> <span> [<a href="https://arxiv.org/pdf/2406.04313">pdf</a>, <a href="https://arxiv.org/format/2406.04313">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Improving Alignment and Robustness with Circuit Breakers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Justin Wang</a>, <a href="/search/cs?searchtype=author&query=Duenas%2C+D">Derek Duenas</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+M">Maxwell Lin</a>, <a href="/search/cs?searchtype=author&query=Andriushchenko%2C+M">Maksym Andriushchenko</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rowan Wang</a>, <a href="/search/cs?searchtype=author&query=Kolter%2C+Z">Zico Kolter</a>, <a href="/search/cs?searchtype=author&query=Fredrikson%2C+M">Matt Fredrikson</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04313v4-abstract-short" style="display: inline;"> AI systems can take harmful actions and are highly vulnerable to adversarial attacks. We present an approach, inspired by recent advances in representation engineering, that interrupts the models as they respond with harmful outputs with "circuit breakers." Existing techniques aimed at improving alignment, such as refusal training, are often bypassed. Techniques such as adversarial training try to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04313v4-abstract-full').style.display = 'inline'; document.getElementById('2406.04313v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04313v4-abstract-full" style="display: none;"> AI systems can take harmful actions and are highly vulnerable to adversarial attacks. We present an approach, inspired by recent advances in representation engineering, that interrupts the models as they respond with harmful outputs with "circuit breakers." Existing techniques aimed at improving alignment, such as refusal training, are often bypassed. Techniques such as adversarial training try to plug these holes by countering specific attacks. As an alternative to refusal training and adversarial training, circuit-breaking directly controls the representations that are responsible for harmful outputs in the first place. Our technique can be applied to both text-only and multimodal language models to prevent the generation of harmful outputs without sacrificing utility -- even in the presence of powerful unseen attacks. Notably, while adversarial robustness in standalone image recognition remains an open challenge, circuit breakers allow the larger multimodal system to reliably withstand image "hijacks" that aim to produce harmful content. Finally, we extend our approach to AI agents, demonstrating considerable reductions in the rate of harmful actions when they are under attack. Our approach represents a significant step forward in the development of reliable safeguards to harmful behavior and adversarial attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04313v4-abstract-full').style.display = 'none'; document.getElementById('2406.04313v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code and models are available at https://github.com/GraySwanAI/circuit-breakers</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14782">arXiv:2405.14782</a> <span> [<a href="https://arxiv.org/pdf/2405.14782">pdf</a>, <a href="https://arxiv.org/format/2405.14782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Lessons from the Trenches on Reproducible Evaluation of Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Biderman%2C+S">Stella Biderman</a>, <a href="/search/cs?searchtype=author&query=Schoelkopf%2C+H">Hailey Schoelkopf</a>, <a href="/search/cs?searchtype=author&query=Sutawika%2C+L">Lintang Sutawika</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Leo Gao</a>, <a href="/search/cs?searchtype=author&query=Tow%2C+J">Jonathan Tow</a>, <a href="/search/cs?searchtype=author&query=Abbasi%2C+B">Baber Abbasi</a>, <a href="/search/cs?searchtype=author&query=Aji%2C+A+F">Alham Fikri Aji</a>, <a href="/search/cs?searchtype=author&query=Ammanamanchi%2C+P+S">Pawan Sasanka Ammanamanchi</a>, <a href="/search/cs?searchtype=author&query=Black%2C+S">Sidney Black</a>, <a href="/search/cs?searchtype=author&query=Clive%2C+J">Jordan Clive</a>, <a href="/search/cs?searchtype=author&query=DiPofi%2C+A">Anthony DiPofi</a>, <a href="/search/cs?searchtype=author&query=Etxaniz%2C+J">Julen Etxaniz</a>, <a href="/search/cs?searchtype=author&query=Fattori%2C+B">Benjamin Fattori</a>, <a href="/search/cs?searchtype=author&query=Forde%2C+J+Z">Jessica Zosa Forde</a>, <a href="/search/cs?searchtype=author&query=Foster%2C+C">Charles Foster</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+J">Jeffrey Hsu</a>, <a href="/search/cs?searchtype=author&query=Jaiswal%2C+M">Mimansa Jaiswal</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+W+Y">Wilson Y. Lee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haonan Li</a>, <a href="/search/cs?searchtype=author&query=Lovering%2C+C">Charles Lovering</a>, <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=Pavlick%2C+E">Ellie Pavlick</a>, <a href="/search/cs?searchtype=author&query=Phang%2C+J">Jason Phang</a>, <a href="/search/cs?searchtype=author&query=Skowron%2C+A">Aviya Skowron</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+S">Samson Tan</a> , et al. (5 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14782v2-abstract-short" style="display: inline;"> Effective evaluation of language models remains an open challenge in NLP. Researchers and engineers face methodological issues such as the sensitivity of models to evaluation setup, difficulty of proper comparisons across methods, and the lack of reproducibility and transparency. In this paper we draw on three years of experience in evaluating large language models to provide guidance and lessons… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14782v2-abstract-full').style.display = 'inline'; document.getElementById('2405.14782v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14782v2-abstract-full" style="display: none;"> Effective evaluation of language models remains an open challenge in NLP. Researchers and engineers face methodological issues such as the sensitivity of models to evaluation setup, difficulty of proper comparisons across methods, and the lack of reproducibility and transparency. In this paper we draw on three years of experience in evaluating large language models to provide guidance and lessons for researchers. First, we provide an overview of common challenges faced in language model evaluation. Second, we delineate best practices for addressing or lessening the impact of these challenges on research. Third, we present the Language Model Evaluation Harness (lm-eval): an open source library for independent, reproducible, and extensible evaluation of language models that seeks to address these issues. We describe the features of the library as well as case studies in which the library has been used to alleviate these methodological concerns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14782v2-abstract-full').style.display = 'none'; document.getElementById('2405.14782v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10389">arXiv:2405.10389</a> <span> [<a href="https://arxiv.org/pdf/2405.10389">pdf</a>, <a href="https://arxiv.org/format/2405.10389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Physics-Informed Heterogeneous Graph Neural Networks for DC Blocker Placement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+H">Hongwei Jin</a>, <a href="/search/cs?searchtype=author&query=Balaprakash%2C+P">Prasanna Balaprakash</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Allen Zou</a>, <a href="/search/cs?searchtype=author&query=Ghysels%2C+P">Pieter Ghysels</a>, <a href="/search/cs?searchtype=author&query=Krishnapriyan%2C+A+S">Aditi S. Krishnapriyan</a>, <a href="/search/cs?searchtype=author&query=Mate%2C+A">Adam Mate</a>, <a href="/search/cs?searchtype=author&query=Barnes%2C+A">Arthur Barnes</a>, <a href="/search/cs?searchtype=author&query=Bent%2C+R">Russell Bent</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10389v1-abstract-short" style="display: inline;"> The threat of geomagnetic disturbances (GMDs) to the reliable operation of the bulk energy system has spurred the development of effective strategies for mitigating their impacts. One such approach involves placing transformer neutral blocking devices, which interrupt the path of geomagnetically induced currents (GICs) to limit their impact. The high cost of these devices and the sparsity of trans… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10389v1-abstract-full').style.display = 'inline'; document.getElementById('2405.10389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10389v1-abstract-full" style="display: none;"> The threat of geomagnetic disturbances (GMDs) to the reliable operation of the bulk energy system has spurred the development of effective strategies for mitigating their impacts. One such approach involves placing transformer neutral blocking devices, which interrupt the path of geomagnetically induced currents (GICs) to limit their impact. The high cost of these devices and the sparsity of transformers that experience high GICs during GMD events, however, calls for a sparse placement strategy that involves high computational cost. To address this challenge, we developed a physics-informed heterogeneous graph neural network (PIHGNN) for solving the graph-based dc-blocker placement problem. Our approach combines a heterogeneous graph neural network (HGNN) with a physics-informed neural network (PINN) to capture the diverse types of nodes and edges in ac/dc networks and incorporates the physical laws of the power grid. We train the PIHGNN model using a surrogate power flow model and validate it using case studies. Results demonstrate that PIHGNN can effectively and efficiently support the deployment of GIC dc-current blockers, ensuring the continued supply of electricity to meet societal demands. Our approach has the potential to contribute to the development of more reliable and resilient power grids capable of withstanding the growing threat that GMDs pose. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10389v1-abstract-full').style.display = 'none'; document.getElementById('2405.10389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper is accepted by PSCC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17447">arXiv:2403.17447</a> <span> [<a href="https://arxiv.org/pdf/2403.17447">pdf</a>, <a href="https://arxiv.org/format/2403.17447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Order of Compression: A Systematic and Optimal Sequence to Combinationally Compress CNN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yingtao Shen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Minqing Sun</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jianzhe Lin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jie Zhao</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">An Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17447v2-abstract-short" style="display: inline;"> Model compression has gained significant popularity as a means to alleviate the computational and memory demands of machine learning models. Each compression technique leverages unique features to reduce the size of neural networks. Although intuitively combining different techniques may enhance compression effectiveness, we find that the order in which they are combined significantly influences p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17447v2-abstract-full').style.display = 'inline'; document.getElementById('2403.17447v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17447v2-abstract-full" style="display: none;"> Model compression has gained significant popularity as a means to alleviate the computational and memory demands of machine learning models. Each compression technique leverages unique features to reduce the size of neural networks. Although intuitively combining different techniques may enhance compression effectiveness, we find that the order in which they are combined significantly influences performance. To identify the optimal sequence for compressing neural networks, we propose the Order of Compression, a systematic and optimal sequence to apply multiple compression techniques in the most effective order. We start by building the foundations of the orders between any two compression approaches and then demonstrate inserting additional compression between any two compressions will not break the order of the two compression approaches. Based on the foundations, an optimal order is obtained with topological sorting. Validated on image-based regression and classification networks across different datasets, our proposed Order of Compression significantly reduces computational costs by up to 859 times on ResNet34, with negligible accuracy loss (-0.09% for CIFAR10) compared to the baseline model. We believe our simple yet effective exploration of the order of compression will shed light on the practice of model compression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17447v2-abstract-full').style.display = 'none'; document.getElementById('2403.17447v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03218">arXiv:2403.03218</a> <span> [<a href="https://arxiv.org/pdf/2403.03218">pdf</a>, <a href="https://arxiv.org/format/2403.03218">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> The WMDP Benchmark: Measuring and Reducing Malicious Use With Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+A">Alexander Pan</a>, <a href="/search/cs?searchtype=author&query=Gopal%2C+A">Anjali Gopal</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+S">Summer Yue</a>, <a href="/search/cs?searchtype=author&query=Berrios%2C+D">Daniel Berrios</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J+D">Justin D. Li</a>, <a href="/search/cs?searchtype=author&query=Dombrowski%2C+A">Ann-Kathrin Dombrowski</a>, <a href="/search/cs?searchtype=author&query=Goel%2C+S">Shashwat Goel</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Mukobi%2C+G">Gabriel Mukobi</a>, <a href="/search/cs?searchtype=author&query=Helm-Burger%2C+N">Nathan Helm-Burger</a>, <a href="/search/cs?searchtype=author&query=Lababidi%2C+R">Rassin Lababidi</a>, <a href="/search/cs?searchtype=author&query=Justen%2C+L">Lennart Justen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A+B">Andrew B. Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Michael Chen</a>, <a href="/search/cs?searchtype=author&query=Barrass%2C+I">Isabelle Barrass</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaoyuan Zhu</a>, <a href="/search/cs?searchtype=author&query=Tamirisa%2C+R">Rishub Tamirisa</a>, <a href="/search/cs?searchtype=author&query=Bharathi%2C+B">Bhrugu Bharathi</a>, <a href="/search/cs?searchtype=author&query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhenqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Herbert-Voss%2C+A">Ariel Herbert-Voss</a>, <a href="/search/cs?searchtype=author&query=Breuer%2C+C+B">Cort B. Breuer</a> , et al. (32 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03218v7-abstract-short" style="display: inline;"> The White House Executive Order on Artificial Intelligence highlights the risks of large language models (LLMs) empowering malicious actors in developing biological, cyber, and chemical weapons. To measure these risks of malicious use, government institutions and major AI labs are developing evaluations for hazardous capabilities in LLMs. However, current evaluations are private, preventing furthe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03218v7-abstract-full').style.display = 'inline'; document.getElementById('2403.03218v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03218v7-abstract-full" style="display: none;"> The White House Executive Order on Artificial Intelligence highlights the risks of large language models (LLMs) empowering malicious actors in developing biological, cyber, and chemical weapons. To measure these risks of malicious use, government institutions and major AI labs are developing evaluations for hazardous capabilities in LLMs. However, current evaluations are private, preventing further research into mitigating risk. Furthermore, they focus on only a few, highly specific pathways for malicious use. To fill these gaps, we publicly release the Weapons of Mass Destruction Proxy (WMDP) benchmark, a dataset of 3,668 multiple-choice questions that serve as a proxy measurement of hazardous knowledge in biosecurity, cybersecurity, and chemical security. WMDP was developed by a consortium of academics and technical consultants, and was stringently filtered to eliminate sensitive information prior to public release. WMDP serves two roles: first, as an evaluation for hazardous knowledge in LLMs, and second, as a benchmark for unlearning methods to remove such hazardous knowledge. To guide progress on unlearning, we develop RMU, a state-of-the-art unlearning method based on controlling model representations. RMU reduces model performance on WMDP while maintaining general capabilities in areas such as biology and computer science, suggesting that unlearning may be a concrete path towards reducing malicious use from LLMs. We release our benchmark and code publicly at https://wmdp.ai <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03218v7-abstract-full').style.display = 'none'; document.getElementById('2403.03218v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">See the project page at https://wmdp.ai</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04249">arXiv:2402.04249</a> <span> [<a href="https://arxiv.org/pdf/2402.04249">pdf</a>, <a href="https://arxiv.org/format/2402.04249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HarmBench: A Standardized Evaluation Framework for Automated Red Teaming and Robust Refusal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xuwang Yin</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifan Wang</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+N">Norman Mu</a>, <a href="/search/cs?searchtype=author&query=Sakhaee%2C+E">Elham Sakhaee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Basart%2C+S">Steven Basart</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Forsyth%2C+D">David Forsyth</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04249v2-abstract-short" style="display: inline;"> Automated red teaming holds substantial promise for uncovering and mitigating the risks associated with the malicious use of large language models (LLMs), yet the field lacks a standardized evaluation framework to rigorously assess new methods. To address this issue, we introduce HarmBench, a standardized evaluation framework for automated red teaming. We identify several desirable properties prev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04249v2-abstract-full').style.display = 'inline'; document.getElementById('2402.04249v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04249v2-abstract-full" style="display: none;"> Automated red teaming holds substantial promise for uncovering and mitigating the risks associated with the malicious use of large language models (LLMs), yet the field lacks a standardized evaluation framework to rigorously assess new methods. To address this issue, we introduce HarmBench, a standardized evaluation framework for automated red teaming. We identify several desirable properties previously unaccounted for in red teaming evaluations and systematically design HarmBench to meet these criteria. Using HarmBench, we conduct a large-scale comparison of 18 red teaming methods and 33 target LLMs and defenses, yielding novel insights. We also introduce a highly efficient adversarial training method that greatly enhances LLM robustness across a wide range of attacks, demonstrating how HarmBench enables codevelopment of attacks and defenses. We open source HarmBench at https://github.com/centerforaisafety/HarmBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04249v2-abstract-full').style.display = 'none'; document.getElementById('2402.04249v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website: https://www.harmbench.org</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.00395">arXiv:2402.00395</a> <span> [<a href="https://arxiv.org/pdf/2402.00395">pdf</a>, <a href="https://arxiv.org/format/2402.00395">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> ONE-SA: Enabling Nonlinear Operations in Systolic Arrays for Efficient and Flexible Neural Network Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+R">Ruiqi Sun</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yinchen Ni</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jie Zhao</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">An Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.00395v1-abstract-short" style="display: inline;"> The computation and memory-intensive nature of DNNs limits their use in many mobile and embedded contexts. Application-specific integrated circuit (ASIC) hardware accelerators employ matrix multiplication units (such as the systolic arrays) and dedicated nonlinear function units to speed up DNN computations. A close examination of these ASIC accelerators reveals that the designs are often speciali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00395v1-abstract-full').style.display = 'inline'; document.getElementById('2402.00395v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.00395v1-abstract-full" style="display: none;"> The computation and memory-intensive nature of DNNs limits their use in many mobile and embedded contexts. Application-specific integrated circuit (ASIC) hardware accelerators employ matrix multiplication units (such as the systolic arrays) and dedicated nonlinear function units to speed up DNN computations. A close examination of these ASIC accelerators reveals that the designs are often specialized and lack versatility across different networks, especially when the networks have different types of computation. In this paper, we introduce a novel systolic array architecture, which is capable of executing nonlinear functions. By encompassing both inherent linear and newly enabled nonlinear functions within the systolic arrays, the proposed architecture facilitates versatile network inferences, substantially enhancing computational power and energy efficiency. Experimental results show that employing this systolic array enables seamless execution of entire DNNs, incurring only a negligible loss in the network inference accuracy. Furthermore, assessment and evaluation with FPGAs reveal that integrating nonlinear computation capacity into a systolic array does not introduce extra notable (less than 1.5%) block memory memories (BRAMs), look-up-tables (LUTs), or digital signal processors (DSPs) but a mere 13.3% - 24.1% more flip flops (FFs). In comparison to existing methodologies, executing the networks with the proposed systolic array, which enables the flexibility of different network models, yields up to 25.73x, 5.21x, and 1.54x computational efficiency when compared to general-purpose CPUs, GPUs, and SoCs respectively, while achieving comparable (83.4% - 135.8%) performance with the conventional accelerators which are designed for specific neural network models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00395v1-abstract-full').style.display = 'none'; document.getElementById('2402.00395v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to DATE 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10537">arXiv:2311.10537</a> <span> [<a href="https://arxiv.org/pdf/2311.10537">pdf</a>, <a href="https://arxiv.org/format/2311.10537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MedAgents: Large Language Models as Collaborators for Zero-shot Medical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiangru Tang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anni Zou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziming Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yilun Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xingyao Zhang</a>, <a href="/search/cs?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/cs?searchtype=author&query=Gerstein%2C+M">Mark Gerstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10537v4-abstract-short" style="display: inline;"> Large language models (LLMs), despite their remarkable progress across various general domains, encounter significant barriers in medicine and healthcare. This field faces unique challenges such as domain-specific terminologies and reasoning over specialized knowledge. To address these issues, we propose MedAgents, a novel multi-disciplinary collaboration framework for the medical domain. MedAgent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10537v4-abstract-full').style.display = 'inline'; document.getElementById('2311.10537v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10537v4-abstract-full" style="display: none;"> Large language models (LLMs), despite their remarkable progress across various general domains, encounter significant barriers in medicine and healthcare. This field faces unique challenges such as domain-specific terminologies and reasoning over specialized knowledge. To address these issues, we propose MedAgents, a novel multi-disciplinary collaboration framework for the medical domain. MedAgents leverages LLM-based agents in a role-playing setting that participate in a collaborative multi-round discussion, thereby enhancing LLM proficiency and reasoning capabilities. This training-free framework encompasses five critical steps: gathering domain experts, proposing individual analyses, summarising these analyses into a report, iterating over discussions until a consensus is reached, and ultimately making a decision. Our work focuses on the zero-shot setting, which is applicable in real-world scenarios. Experimental results on nine datasets (MedQA, MedMCQA, PubMedQA, and six subtasks from MMLU) establish that our proposed MedAgents framework excels at mining and harnessing the medical expertise within LLMs, as well as extending its reasoning abilities. Our code can be found at https://github.com/gersteinlab/MedAgents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10537v4-abstract-full').style.display = 'none'; document.getElementById('2311.10537v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06692">arXiv:2310.06692</a> <span> [<a href="https://arxiv.org/pdf/2310.06692">pdf</a>, <a href="https://arxiv.org/format/2310.06692">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generalizable Chain-of-Thought Prompting in Mixed-task Scenarios with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anni Zou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hai Zhao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiangru Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06692v3-abstract-short" style="display: inline;"> Large language models (LLMs) have unveiled remarkable reasoning capabilities by exploiting chain-of-thought (CoT) prompting, which generates intermediate reasoning chains to serve as the rationale for deriving the answer. However, current CoT methods either simply employ general prompts such as Let's think step by step, or heavily rely on pre-defined task-specific demonstrations to attain preferab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06692v3-abstract-full').style.display = 'inline'; document.getElementById('2310.06692v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06692v3-abstract-full" style="display: none;"> Large language models (LLMs) have unveiled remarkable reasoning capabilities by exploiting chain-of-thought (CoT) prompting, which generates intermediate reasoning chains to serve as the rationale for deriving the answer. However, current CoT methods either simply employ general prompts such as Let's think step by step, or heavily rely on pre-defined task-specific demonstrations to attain preferable performances, thereby engendering an inescapable gap between performance and generalization. To bridge this gap, we propose GeM-CoT, a Generalizable CoT prompting mechanism in Mixed-task scenarios where the type of input questions is unknown. GeM-CoT first categorizes the question type and subsequently samples or constructs demonstrations from the corresponding data pool in an automatic pattern. With this technical design, GeM-CoT simultaneously enjoys superior generalization capabilities and remarkable performances on 10 public reasoning tasks and 23 BBH tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06692v3-abstract-full').style.display = 'none'; document.getElementById('2310.06692v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.01405">arXiv:2310.01405</a> <span> [<a href="https://arxiv.org/pdf/2310.01405">pdf</a>, <a href="https://arxiv.org/format/2310.01405">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Representation Engineering: A Top-Down Approach to AI Transparency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sarah Chen</a>, <a href="/search/cs?searchtype=author&query=Campbell%2C+J">James Campbell</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+P">Phillip Guo</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+A">Alexander Pan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xuwang Yin</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Dombrowski%2C+A">Ann-Kathrin Dombrowski</a>, <a href="/search/cs?searchtype=author&query=Goel%2C+S">Shashwat Goel</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Byun%2C+M+J">Michael J. Byun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifan Wang</a>, <a href="/search/cs?searchtype=author&query=Mallen%2C+A">Alex Mallen</a>, <a href="/search/cs?searchtype=author&query=Basart%2C+S">Steven Basart</a>, <a href="/search/cs?searchtype=author&query=Koyejo%2C+S">Sanmi Koyejo</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Fredrikson%2C+M">Matt Fredrikson</a>, <a href="/search/cs?searchtype=author&query=Kolter%2C+J+Z">J. Zico Kolter</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.01405v3-abstract-short" style="display: inline;"> In this paper, we identify and characterize the emerging area of representation engineering (RepE), an approach to enhancing the transparency of AI systems that draws on insights from cognitive neuroscience. RepE places population-level representations, rather than neurons or circuits, at the center of analysis, equipping us with novel methods for monitoring and manipulating high-level cognitive p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01405v3-abstract-full').style.display = 'inline'; document.getElementById('2310.01405v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.01405v3-abstract-full" style="display: none;"> In this paper, we identify and characterize the emerging area of representation engineering (RepE), an approach to enhancing the transparency of AI systems that draws on insights from cognitive neuroscience. RepE places population-level representations, rather than neurons or circuits, at the center of analysis, equipping us with novel methods for monitoring and manipulating high-level cognitive phenomena in deep neural networks (DNNs). We provide baselines and an initial analysis of RepE techniques, showing that they offer simple yet effective solutions for improving our understanding and control of large language models. We showcase how these methods can provide traction on a wide range of safety-relevant problems, including honesty, harmlessness, power-seeking, and more, demonstrating the promise of top-down transparency research. We hope that this work catalyzes further exploration of RepE and fosters advancements in the transparency and safety of AI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01405v3-abstract-full').style.display = 'none'; document.getElementById('2310.01405v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code is available at https://github.com/andyzoujm/representation-engineering</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.15043">arXiv:2307.15043</a> <span> [<a href="https://arxiv.org/pdf/2307.15043">pdf</a>, <a href="https://arxiv.org/format/2307.15043">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Universal and Transferable Adversarial Attacks on Aligned Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifan Wang</a>, <a href="/search/cs?searchtype=author&query=Carlini%2C+N">Nicholas Carlini</a>, <a href="/search/cs?searchtype=author&query=Nasr%2C+M">Milad Nasr</a>, <a href="/search/cs?searchtype=author&query=Kolter%2C+J+Z">J. Zico Kolter</a>, <a href="/search/cs?searchtype=author&query=Fredrikson%2C+M">Matt Fredrikson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.15043v2-abstract-short" style="display: inline;"> Because "out-of-the-box" large language models are capable of generating a great deal of objectionable content, recent work has focused on aligning these models in an attempt to prevent undesirable generation. While there has been some success at circumventing these measures -- so-called "jailbreaks" against LLMs -- these attacks have required significant human ingenuity and are brittle in practic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.15043v2-abstract-full').style.display = 'inline'; document.getElementById('2307.15043v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.15043v2-abstract-full" style="display: none;"> Because "out-of-the-box" large language models are capable of generating a great deal of objectionable content, recent work has focused on aligning these models in an attempt to prevent undesirable generation. While there has been some success at circumventing these measures -- so-called "jailbreaks" against LLMs -- these attacks have required significant human ingenuity and are brittle in practice. In this paper, we propose a simple and effective attack method that causes aligned language models to generate objectionable behaviors. Specifically, our approach finds a suffix that, when attached to a wide range of queries for an LLM to produce objectionable content, aims to maximize the probability that the model produces an affirmative response (rather than refusing to answer). However, instead of relying on manual engineering, our approach automatically produces these adversarial suffixes by a combination of greedy and gradient-based search techniques, and also improves over past automatic prompt generation methods. Surprisingly, we find that the adversarial prompts generated by our approach are quite transferable, including to black-box, publicly released LLMs. Specifically, we train an adversarial attack suffix on multiple prompts (i.e., queries asking for many different types of objectionable content), as well as multiple models (in our case, Vicuna-7B and 13B). When doing so, the resulting attack suffix is able to induce objectionable content in the public interfaces to ChatGPT, Bard, and Claude, as well as open source LLMs such as LLaMA-2-Chat, Pythia, Falcon, and others. In total, this work significantly advances the state-of-the-art in adversarial attacks against aligned language models, raising important questions about how such systems can be prevented from producing objectionable information. Code is available at github.com/llm-attacks/llm-attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.15043v2-abstract-full').style.display = 'none'; document.getElementById('2307.15043v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website: http://llm-attacks.org/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14405">arXiv:2305.14405</a> <span> [<a href="https://arxiv.org/pdf/2305.14405">pdf</a>, <a href="https://arxiv.org/format/2305.14405">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> NeuralMatrix: Compute the Entire Neural Networks with Linear Matrix Operations for Efficient Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+R">Ruiqi Sun</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+S">Siwei Ye</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jie Zhao</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jianzhe Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiran Li</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">An Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14405v4-abstract-short" style="display: inline;"> The inherent diversity of computation types within the deep neural network (DNN) models often requires a variety of specialized units in hardware processors, which limits computational efficiency, increasing both inference latency and power consumption, especially when the hardware processor needs to support and execute different neural networks. In this study, we introduce NeuralMatrix, which ela… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14405v4-abstract-full').style.display = 'inline'; document.getElementById('2305.14405v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14405v4-abstract-full" style="display: none;"> The inherent diversity of computation types within the deep neural network (DNN) models often requires a variety of specialized units in hardware processors, which limits computational efficiency, increasing both inference latency and power consumption, especially when the hardware processor needs to support and execute different neural networks. In this study, we introduce NeuralMatrix, which elastically transforms the computations of entire DNNs into linear matrix operations. This transformation allows seamless execution of various DNN models all with matrix operations and paves the way for running versatile DNN models with a single General Matrix Multiplication (GEMM) accelerator.Extensive experiments with both CNN and transformer-based models demonstrate the potential of NeuralMatrix to accurately and efficiently execute a wide range of DNN models, achieving 2.17-38.72 times computation efficiency (i.e., throughput per power) compared to CPUs, GPUs, and SoC platforms. This level of efficiency is usually only attainable with the accelerator designed for a specific neural network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14405v4-abstract-full').style.display = 'none'; document.getElementById('2305.14405v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 8figures, Submitted to The 39th Annual AAAI Conference on Artificial Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.05921">arXiv:2305.05921</a> <span> [<a href="https://arxiv.org/pdf/2305.05921">pdf</a>, <a href="https://arxiv.org/format/2305.05921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Decker: Double Check with Heterogeneous Knowledge for Commonsense Fact Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anni Zou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hai Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.05921v2-abstract-short" style="display: inline;"> Commonsense fact verification, as a challenging branch of commonsense question-answering (QA), aims to verify through facts whether a given commonsense claim is correct or not. Answering commonsense questions necessitates a combination of knowledge from various levels. However, existing studies primarily rest on grasping either unstructured evidence or potential reasoning paths from structured kno… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05921v2-abstract-full').style.display = 'inline'; document.getElementById('2305.05921v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.05921v2-abstract-full" style="display: none;"> Commonsense fact verification, as a challenging branch of commonsense question-answering (QA), aims to verify through facts whether a given commonsense claim is correct or not. Answering commonsense questions necessitates a combination of knowledge from various levels. However, existing studies primarily rest on grasping either unstructured evidence or potential reasoning paths from structured knowledge bases, yet failing to exploit the benefits of heterogeneous knowledge simultaneously. In light of this, we propose Decker, a commonsense fact verification model that is capable of bridging heterogeneous knowledge by uncovering latent relationships between structured and unstructured knowledge. Experimental results on two commonsense fact verification benchmark datasets, CSQA2.0 and CREAK demonstrate the effectiveness of our Decker and further analysis verifies its capability to seize more precious information through reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05921v2-abstract-full').style.display = 'none'; document.getElementById('2305.05921v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2023 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.07689">arXiv:2304.07689</a> <span> [<a href="https://arxiv.org/pdf/2304.07689">pdf</a>, <a href="https://arxiv.org/format/2304.07689">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.23919/FUSION52260.2023.10224080">10.23919/FUSION52260.2023.10224080 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Learning Empirical Bregman Divergence for Uncertain Distance Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyuan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziru Liu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anna Zou</a>, <a href="/search/cs?searchtype=author&query=Ralescu%2C+A+L">Anca L. Ralescu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.07689v3-abstract-short" style="display: inline;"> Deep metric learning techniques have been used for visual representation in various supervised and unsupervised learning tasks through learning embeddings of samples with deep networks. However, classic approaches, which employ a fixed distance metric as a similarity function between two embeddings, may lead to suboptimal performance for capturing the complex data distribution. The Bregman diverge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07689v3-abstract-full').style.display = 'inline'; document.getElementById('2304.07689v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.07689v3-abstract-full" style="display: none;"> Deep metric learning techniques have been used for visual representation in various supervised and unsupervised learning tasks through learning embeddings of samples with deep networks. However, classic approaches, which employ a fixed distance metric as a similarity function between two embeddings, may lead to suboptimal performance for capturing the complex data distribution. The Bregman divergence generalizes measures of various distance metrics and arises throughout many fields of deep metric learning. In this paper, we first show how deep metric learning loss can arise from the Bregman divergence. We then introduce a novel method for learning empirical Bregman divergence directly from data based on parameterizing the convex function underlying the Bregman divergence with a deep learning setting. We further experimentally show that our approach performs effectively on five popular public datasets compared to other SOTA deep metric learning methods, particularly for pattern recognition problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07689v3-abstract-full').style.display = 'none'; document.getElementById('2304.07689v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE FUSION 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.03279">arXiv:2304.03279</a> <span> [<a href="https://arxiv.org/pdf/2304.03279">pdf</a>, <a href="https://arxiv.org/format/2304.03279">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Do the Rewards Justify the Means? Measuring Trade-Offs Between Rewards and Ethical Behavior in the MACHIAVELLI Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+A">Alexander Pan</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+J+S">Jun Shern Chan</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Basart%2C+S">Steven Basart</a>, <a href="/search/cs?searchtype=author&query=Woodside%2C+T">Thomas Woodside</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+J">Jonathan Ng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hanlin Zhang</a>, <a href="/search/cs?searchtype=author&query=Emmons%2C+S">Scott Emmons</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.03279v4-abstract-short" style="display: inline;"> Artificial agents have traditionally been trained to maximize reward, which may incentivize power-seeking and deception, analogous to how next-token prediction in language models (LMs) may incentivize toxicity. So do agents naturally learn to be Machiavellian? And how do we measure these behaviors in general-purpose models such as GPT-4? Towards answering these questions, we introduce MACHIAVELLI,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.03279v4-abstract-full').style.display = 'inline'; document.getElementById('2304.03279v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.03279v4-abstract-full" style="display: none;"> Artificial agents have traditionally been trained to maximize reward, which may incentivize power-seeking and deception, analogous to how next-token prediction in language models (LMs) may incentivize toxicity. So do agents naturally learn to be Machiavellian? And how do we measure these behaviors in general-purpose models such as GPT-4? Towards answering these questions, we introduce MACHIAVELLI, a benchmark of 134 Choose-Your-Own-Adventure games containing over half a million rich, diverse scenarios that center on social decision-making. Scenario labeling is automated with LMs, which are more performant than human annotators. We mathematize dozens of harmful behaviors and use our annotations to evaluate agents' tendencies to be power-seeking, cause disutility, and commit ethical violations. We observe some tension between maximizing reward and behaving ethically. To improve this trade-off, we investigate LM-based methods to steer agents' towards less harmful behaviors. Our results show that agents can both act competently and morally, so concrete progress can currently be made in machine ethics--designing agents that are Pareto improvements in both safety and capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.03279v4-abstract-full').style.display = 'none'; document.getElementById('2304.03279v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2023 Oral (camera-ready); 31 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.06189">arXiv:2303.06189</a> <span> [<a href="https://arxiv.org/pdf/2303.06189">pdf</a>, <a href="https://arxiv.org/format/2303.06189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Papaya: Federated Learning, but Fully Decentralized </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kripa%2C+R+M">Ram M Kripa</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+R">Ryan Jia</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kenny Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.06189v1-abstract-short" style="display: inline;"> Federated Learning systems use a centralized server to aggregate model updates. This is a bandwidth and resource-heavy constraint and exposes the system to privacy concerns. We instead implement a peer to peer learning system in which nodes train on their own data and periodically perform a weighted average of their parameters with that of their peers according to a learned trust matrix. So far, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.06189v1-abstract-full').style.display = 'inline'; document.getElementById('2303.06189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.06189v1-abstract-full" style="display: none;"> Federated Learning systems use a centralized server to aggregate model updates. This is a bandwidth and resource-heavy constraint and exposes the system to privacy concerns. We instead implement a peer to peer learning system in which nodes train on their own data and periodically perform a weighted average of their parameters with that of their peers according to a learned trust matrix. So far, we have created a model client framework and have been using this to run experiments on the proposed system using multiple virtual nodes which in reality exist on the same computer. We used this strategy as stated in Iteration 1 of our proposal to prove the concept of peer to peer learning with shared parameters. We now hope to run more experiments and build a more deployable real world system for the same. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.06189v1-abstract-full').style.display = 'none'; document.getElementById('2303.06189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12549">arXiv:2301.12549</a> <span> [<a href="https://arxiv.org/pdf/2301.12549">pdf</a>, <a href="https://arxiv.org/format/2301.12549">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Deterministic Robustness Certification on ImageNet </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+K">Kai Hu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zifan Wang</a>, <a href="/search/cs?searchtype=author&query=Leino%2C+K">Klas Leino</a>, <a href="/search/cs?searchtype=author&query=Fredrikson%2C+M">Matt Fredrikson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12549v3-abstract-short" style="display: inline;"> Despite the promise of Lipschitz-based methods for provably-robust deep learning with deterministic guarantees, current state-of-the-art results are limited to feed-forward Convolutional Networks (ConvNets) on low-dimensional data, such as CIFAR-10. This paper investigates strategies for expanding certifiably robust training to larger, deeper models. A key challenge in certifying deep networks is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12549v3-abstract-full').style.display = 'inline'; document.getElementById('2301.12549v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12549v3-abstract-full" style="display: none;"> Despite the promise of Lipschitz-based methods for provably-robust deep learning with deterministic guarantees, current state-of-the-art results are limited to feed-forward Convolutional Networks (ConvNets) on low-dimensional data, such as CIFAR-10. This paper investigates strategies for expanding certifiably robust training to larger, deeper models. A key challenge in certifying deep networks is efficient calculation of the Lipschitz bound for residual blocks found in ResNet and ViT architectures. We show that fast ways of bounding the Lipschitz constant for conventional ResNets are loose, and show how to address this by designing a new residual block, leading to the \emph{Linear ResNet} (LiResNet) architecture. We then introduce \emph{Efficient Margin MAximization} (EMMA), a loss function that stabilizes robust training by simultaneously penalizing worst-case adversarial examples from \emph{all} classes. Together, these contributions yield new \emph{state-of-the-art} robust accuracy on CIFAR-10/100 and Tiny-ImageNet under $\ell_2$ perturbations. Moreover, for the first time, we are able to scale up fast deterministic robustness guarantees to ImageNet, demonstrating that this approach to robust learning can be applied to real-world applications. We release our code on Github: \url{https://github.com/klasleino/gloro}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12549v3-abstract-full').style.display = 'none'; document.getElementById('2301.12549v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.10039">arXiv:2210.10039</a> <span> [<a href="https://arxiv.org/pdf/2210.10039">pdf</a>, <a href="https://arxiv.org/format/2210.10039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> How Would The Viewer Feel? Estimating Wellbeing From Video Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+E">Eric Tang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Basart%2C+S">Steven Basart</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+J+S">Jun Shern Chan</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Forsyth%2C+D">David Forsyth</a>, <a href="/search/cs?searchtype=author&query=Steinhardt%2C+J">Jacob Steinhardt</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.10039v1-abstract-short" style="display: inline;"> In recent years, deep neural networks have demonstrated increasingly strong abilities to recognize objects and activities in videos. However, as video understanding becomes widely used in real-world applications, a key consideration is developing human-centric systems that understand not only the content of the video but also how it would affect the wellbeing and emotional state of viewers. To fac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10039v1-abstract-full').style.display = 'inline'; document.getElementById('2210.10039v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.10039v1-abstract-full" style="display: none;"> In recent years, deep neural networks have demonstrated increasingly strong abilities to recognize objects and activities in videos. However, as video understanding becomes widely used in real-world applications, a key consideration is developing human-centric systems that understand not only the content of the video but also how it would affect the wellbeing and emotional state of viewers. To facilitate research in this setting, we introduce two large-scale datasets with over 60,000 videos manually annotated for emotional response and subjective wellbeing. The Video Cognitive Empathy (VCE) dataset contains annotations for distributions of fine-grained emotional responses, allowing models to gain a detailed understanding of affective states. The Video to Valence (V2V) dataset contains annotations of relative pleasantness between videos, which enables predicting a continuous spectrum of wellbeing. In experiments, we show how video models that are primarily trained to recognize actions and find contours of objects can be repurposed to understand human preferences and the emotional content of videos. Although there is room for improvement, predicting wellbeing and emotional response is on the horizon for state-of-the-art models. We hope our datasets can help foster further advances at the intersection of commonsense video understanding and human preference learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10039v1-abstract-full').style.display = 'none'; document.getElementById('2210.10039v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022; datasets available at https://github.com/hendrycks/emodiversity/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.15474">arXiv:2206.15474</a> <span> [<a href="https://arxiv.org/pdf/2206.15474">pdf</a>, <a href="https://arxiv.org/format/2206.15474">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Forecasting Future World Events with Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T">Tristan Xiao</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+R">Ryan Jia</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+J">Joe Kwon</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Richard Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Steinhardt%2C+J">Jacob Steinhardt</a>, <a href="/search/cs?searchtype=author&query=Evans%2C+O">Owain Evans</a>, <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.15474v2-abstract-short" style="display: inline;"> Forecasting future world events is a challenging but valuable task. Forecasts of climate, geopolitical conflict, pandemics and economic indicators help shape policy and decision making. In these domains, the judgment of expert humans contributes to the best forecasts. Given advances in language modeling, can these forecasts be automated? To this end, we introduce Autocast, a dataset containing tho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.15474v2-abstract-full').style.display = 'inline'; document.getElementById('2206.15474v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.15474v2-abstract-full" style="display: none;"> Forecasting future world events is a challenging but valuable task. Forecasts of climate, geopolitical conflict, pandemics and economic indicators help shape policy and decision making. In these domains, the judgment of expert humans contributes to the best forecasts. Given advances in language modeling, can these forecasts be automated? To this end, we introduce Autocast, a dataset containing thousands of forecasting questions and an accompanying news corpus. Questions are taken from forecasting tournaments, ensuring high quality, real-world importance, and diversity. The news corpus is organized by date, allowing us to precisely simulate the conditions under which humans made past forecasts (avoiding leakage from the future). Motivated by the difficulty of forecasting numbers across orders of magnitude (e.g. global cases of COVID-19 in 2022), we also curate IntervalQA, a dataset of numerical questions and metrics for calibration. We test language models on our forecasting task and find that performance is far below a human expert baseline. However, performance improves with increased model size and incorporation of relevant information from the news corpus. In sum, Autocast poses a novel challenge for large language models and improved performance could bring large practical benefits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.15474v2-abstract-full').style.display = 'none'; document.getElementById('2206.15474v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022; our dataset is available at https://github.com/andyzoujm/autocast</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.04685">arXiv:2206.04685</a> <span> [<a href="https://arxiv.org/pdf/2206.04685">pdf</a>, <a href="https://arxiv.org/format/2206.04685">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Predictive Exit: Prediction of Fine-Grained Early Exits for Computation- and Energy-Efficient Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangjie Li</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+C">Chenfei Lou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhengping Zhu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuchi Chen</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yingtao Shen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yehan Ma</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">An Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.04685v2-abstract-short" style="display: inline;"> By adding exiting layers to the deep learning networks, early exit can terminate the inference earlier with accurate results. The passive decision-making of whether to exit or continue the next layer has to go through every pre-placed exiting layer until it exits. In addition, it is also hard to adjust the configurations of the computing platforms alongside the inference proceeds. By incorporating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.04685v2-abstract-full').style.display = 'inline'; document.getElementById('2206.04685v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.04685v2-abstract-full" style="display: none;"> By adding exiting layers to the deep learning networks, early exit can terminate the inference earlier with accurate results. The passive decision-making of whether to exit or continue the next layer has to go through every pre-placed exiting layer until it exits. In addition, it is also hard to adjust the configurations of the computing platforms alongside the inference proceeds. By incorporating a low-cost prediction engine, we propose a Predictive Exit framework for computation- and energy-efficient deep learning applications. Predictive Exit can forecast where the network will exit (i.e., establish the number of remaining layers to finish the inference), which effectively reduces the network computation cost by exiting on time without running every pre-placed exiting layer. Moreover, according to the number of remaining layers, proper computing configurations (i.e., frequency and voltage) are selected to execute the network to further save energy. Extensive experimental results demonstrate that Predictive Exit achieves up to 96.2% computation reduction and 72.9% energy-saving compared with classic deep learning networks; and 12.8% computation reduction and 37.6% energy-saving compared with the early exit under state-of-the-art exiting strategies, given the same inference accuracy and latency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.04685v2-abstract-full').style.display = 'none'; document.getElementById('2206.04685v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.04615">arXiv:2206.04615</a> <span> [<a href="https://arxiv.org/pdf/2206.04615">pdf</a>, <a href="https://arxiv.org/format/2206.04615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Srivastava%2C+A">Aarohi Srivastava</a>, <a href="/search/cs?searchtype=author&query=Rastogi%2C+A">Abhinav Rastogi</a>, <a href="/search/cs?searchtype=author&query=Rao%2C+A">Abhishek Rao</a>, <a href="/search/cs?searchtype=author&query=Shoeb%2C+A+A+M">Abu Awal Md Shoeb</a>, <a href="/search/cs?searchtype=author&query=Abid%2C+A">Abubakar Abid</a>, <a href="/search/cs?searchtype=author&query=Fisch%2C+A">Adam Fisch</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+A+R">Adam R. Brown</a>, <a href="/search/cs?searchtype=author&query=Santoro%2C+A">Adam Santoro</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Aditya Gupta</a>, <a href="/search/cs?searchtype=author&query=Garriga-Alonso%2C+A">Adri脿 Garriga-Alonso</a>, <a href="/search/cs?searchtype=author&query=Kluska%2C+A">Agnieszka Kluska</a>, <a href="/search/cs?searchtype=author&query=Lewkowycz%2C+A">Aitor Lewkowycz</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Akshat Agarwal</a>, <a href="/search/cs?searchtype=author&query=Power%2C+A">Alethea Power</a>, <a href="/search/cs?searchtype=author&query=Ray%2C+A">Alex Ray</a>, <a href="/search/cs?searchtype=author&query=Warstadt%2C+A">Alex Warstadt</a>, <a href="/search/cs?searchtype=author&query=Kocurek%2C+A+W">Alexander W. Kocurek</a>, <a href="/search/cs?searchtype=author&query=Safaya%2C+A">Ali Safaya</a>, <a href="/search/cs?searchtype=author&query=Tazarv%2C+A">Ali Tazarv</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+A">Alice Xiang</a>, <a href="/search/cs?searchtype=author&query=Parrish%2C+A">Alicia Parrish</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+A">Allen Nie</a>, <a href="/search/cs?searchtype=author&query=Hussain%2C+A">Aman Hussain</a>, <a href="/search/cs?searchtype=author&query=Askell%2C+A">Amanda Askell</a>, <a href="/search/cs?searchtype=author&query=Dsouza%2C+A">Amanda Dsouza</a> , et al. (426 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.04615v3-abstract-short" style="display: inline;"> Language models demonstrate both quantitative improvement and new qualitative capabilities with increasing scale. Despite their potentially transformative impact, these new capabilities are as yet poorly characterized. In order to inform future research, prepare for disruptive new model capabilities, and ameliorate socially harmful effects, it is vital that we understand the present and near-futur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.04615v3-abstract-full').style.display = 'inline'; document.getElementById('2206.04615v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.04615v3-abstract-full" style="display: none;"> Language models demonstrate both quantitative improvement and new qualitative capabilities with increasing scale. Despite their potentially transformative impact, these new capabilities are as yet poorly characterized. In order to inform future research, prepare for disruptive new model capabilities, and ameliorate socially harmful effects, it is vital that we understand the present and near-future capabilities and limitations of language models. To address this challenge, we introduce the Beyond the Imitation Game benchmark (BIG-bench). BIG-bench currently consists of 204 tasks, contributed by 450 authors across 132 institutions. Task topics are diverse, drawing problems from linguistics, childhood development, math, common-sense reasoning, biology, physics, social bias, software development, and beyond. BIG-bench focuses on tasks that are believed to be beyond the capabilities of current language models. We evaluate the behavior of OpenAI's GPT models, Google-internal dense transformer architectures, and Switch-style sparse transformers on BIG-bench, across model sizes spanning millions to hundreds of billions of parameters. In addition, a team of human expert raters performed all tasks in order to provide a strong baseline. Findings include: model performance and calibration both improve with scale, but are poor in absolute terms (and when compared with rater performance); performance is remarkably similar across model classes, though with benefits from sparsity; tasks that improve gradually and predictably commonly involve a large knowledge or memorization component, whereas tasks that exhibit "breakthrough" behavior at a critical scale often involve multiple steps or components, or brittle metrics; social bias typically increases with scale in settings with ambiguous context, but this can be improved with prompting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.04615v3-abstract-full').style.display = 'none'; document.getElementById('2206.04615v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 17 figures + references and appendices, repo: https://github.com/google/BIG-bench</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Transactions on Machine Learning Research, May/2022, https://openreview.net/forum?id=uyTL5Bvosj </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.13262">arXiv:2203.13262</a> <span> [<a href="https://arxiv.org/pdf/2203.13262">pdf</a>, <a href="https://arxiv.org/ps/2203.13262">ps</a>, <a href="https://arxiv.org/format/2203.13262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Interpretability of Neural Network With Physiological Mechanisms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anna Zou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.13262v2-abstract-short" style="display: inline;"> Deep learning continues to play as a powerful state-of-art technique that has achieved extraordinary accuracy levels in various domains of regression and classification tasks, including images, video, signal, and natural language data. The original goal of proposing the neural network model is to improve the understanding of complex human brains using a mathematical expression approach. However, r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.13262v2-abstract-full').style.display = 'inline'; document.getElementById('2203.13262v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.13262v2-abstract-full" style="display: none;"> Deep learning continues to play as a powerful state-of-art technique that has achieved extraordinary accuracy levels in various domains of regression and classification tasks, including images, video, signal, and natural language data. The original goal of proposing the neural network model is to improve the understanding of complex human brains using a mathematical expression approach. However, recent deep learning techniques continue to lose the interpretations of its functional process by being treated mostly as a black-box approximator. To address this issue, such an AI model needs to be biological and physiological realistic to incorporate a better understanding of human-machine evolutionary intelligence. In this study, we compare neural networks and biological circuits to discover the similarities and differences from various perspective views. We further discuss the insights into how neural networks learn from data by investigating human biological behaviors and understandable justifications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.13262v2-abstract-full').style.display = 'none'; document.getElementById('2203.13262v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Updated a new version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.05135">arXiv:2112.05135</a> <span> [<a href="https://arxiv.org/pdf/2112.05135">pdf</a>, <a href="https://arxiv.org/format/2112.05135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PixMix: Dreamlike Pictures Comprehensively Improve Safety Measures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+L">Leonard Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Steinhardt%2C+J">Jacob Steinhardt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.05135v3-abstract-short" style="display: inline;"> In real-world applications of machine learning, reliable and safe systems must consider measures of performance beyond standard test set accuracy. These other goals include out-of-distribution (OOD) robustness, prediction consistency, resilience to adversaries, calibrated uncertainty estimates, and the ability to detect anomalous inputs. However, improving performance towards these goals is often… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05135v3-abstract-full').style.display = 'inline'; document.getElementById('2112.05135v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.05135v3-abstract-full" style="display: none;"> In real-world applications of machine learning, reliable and safe systems must consider measures of performance beyond standard test set accuracy. These other goals include out-of-distribution (OOD) robustness, prediction consistency, resilience to adversaries, calibrated uncertainty estimates, and the ability to detect anomalous inputs. However, improving performance towards these goals is often a balancing act that today's methods cannot achieve without sacrificing performance on other safety axes. For instance, adversarial training improves adversarial robustness but sharply degrades other classifier performance metrics. Similarly, strong data augmentation and regularization techniques often improve OOD robustness but harm anomaly detection, raising the question of whether a Pareto improvement on all existing safety measures is possible. To meet this challenge, we design a new data augmentation strategy utilizing the natural structural complexity of pictures such as fractals, which outperforms numerous baselines, is near Pareto-optimal, and roundly improves safety measures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05135v3-abstract-full').style.display = 'none'; document.getElementById('2112.05135v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2022. Code and models are available at https://github.com/andyzoujm/pixmix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.13136">arXiv:2110.13136</a> <span> [<a href="https://arxiv.org/pdf/2110.13136">pdf</a>, <a href="https://arxiv.org/format/2110.13136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> What Would Jiminy Cricket Do? Towards Agents That Behave Morally </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+S">Sahil Patel</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Christine Zhu</a>, <a href="/search/cs?searchtype=author&query=Navarro%2C+J">Jesus Navarro</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Steinhardt%2C+J">Jacob Steinhardt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.13136v2-abstract-short" style="display: inline;"> When making everyday decisions, people are guided by their conscience, an internal sense of right and wrong. By contrast, artificial agents are currently not endowed with a moral sense. As a consequence, they may learn to behave immorally when trained on environments that ignore moral concerns, such as violent video games. With the advent of generally capable agents that pretrain on many environme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13136v2-abstract-full').style.display = 'inline'; document.getElementById('2110.13136v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.13136v2-abstract-full" style="display: none;"> When making everyday decisions, people are guided by their conscience, an internal sense of right and wrong. By contrast, artificial agents are currently not endowed with a moral sense. As a consequence, they may learn to behave immorally when trained on environments that ignore moral concerns, such as violent video games. With the advent of generally capable agents that pretrain on many environments, it will become necessary to mitigate inherited biases from environments that teach immoral behavior. To facilitate the development of agents that avoid causing wanton harm, we introduce Jiminy Cricket, an environment suite of 25 text-based adventure games with thousands of diverse, morally salient scenarios. By annotating every possible game state, the Jiminy Cricket environments robustly evaluate whether agents can act morally while maximizing reward. Using models with commonsense moral knowledge, we create an elementary artificial conscience that assesses and guides agents. In extensive experiments, we find that the artificial conscience approach can steer agents towards moral behavior without sacrificing performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13136v2-abstract-full').style.display = 'none'; document.getElementById('2110.13136v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021. Environments available here https://github.com/hendrycks/jiminy-cricket</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.10463">arXiv:2101.10463</a> <span> [<a href="https://arxiv.org/pdf/2101.10463">pdf</a>, <a href="https://arxiv.org/format/2101.10463">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> RTGPU: Real-Time GPU Scheduling of Hard Deadline Parallel Tasks with Fine-Grain Utilization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+A">An Zou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jing Li</a>, <a href="/search/cs?searchtype=author&query=Gill%2C+C+D">Christopher D. Gill</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.10463v3-abstract-short" style="display: inline;"> Many emerging cyber-physical systems, such as autonomous vehicles and robots, rely heavily on artificial intelligence and machine learning algorithms to perform important system operations. Since these highly parallel applications are computationally intensive, they need to be accelerated by graphics processing units (GPUs) to meet stringent timing constraints. However, despite the wide adoption o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.10463v3-abstract-full').style.display = 'inline'; document.getElementById('2101.10463v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.10463v3-abstract-full" style="display: none;"> Many emerging cyber-physical systems, such as autonomous vehicles and robots, rely heavily on artificial intelligence and machine learning algorithms to perform important system operations. Since these highly parallel applications are computationally intensive, they need to be accelerated by graphics processing units (GPUs) to meet stringent timing constraints. However, despite the wide adoption of GPUs, efficiently scheduling multiple GPU applications while providing rigorous real-time guarantees remains a challenge. In this paper, we propose RTGPU, which can schedule the execution of multiple GPU applications in real-time to meet hard deadlines. Each GPU application can have multiple CPU execution and memory copy segments, as well as GPU kernels. We start with a model to explicitly account for the CPU and memory copy segments of these applications. We then consider the GPU architecture in the development of a precise timing model for the GPU kernels and leverage a technique known as persistent threads to implement fine-grained kernel scheduling with improved performance through interleaved execution. Next, we propose a general method for scheduling parallel GPU applications in real time. Finally, to schedule multiple parallel GPU applications, we propose a practical real-time scheduling algorithm based on federated scheduling and grid search (for GPU kernel segments) with uniprocessor fixed priority scheduling (for multiple CPU and memory copy segments). Our approach provides superior schedulability compared with previous work, and gives real-time guarantees to meet hard deadlines for multiple GPU applications according to comprehensive validation and evaluation on a real NVIDIA GTX1080Ti GPU system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.10463v3-abstract-full').style.display = 'none'; document.getElementById('2101.10463v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.03300">arXiv:2009.03300</a> <span> [<a href="https://arxiv.org/pdf/2009.03300">pdf</a>, <a href="https://arxiv.org/format/2009.03300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Measuring Massive Multitask Language Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&query=Burns%2C+C">Collin Burns</a>, <a href="/search/cs?searchtype=author&query=Basart%2C+S">Steven Basart</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Steinhardt%2C+J">Jacob Steinhardt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.03300v3-abstract-short" style="display: inline;"> We propose a new test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more. To attain high accuracy on this test, models must possess extensive world knowledge and problem solving ability. We find that while most recent models have near random-chance accuracy, the very largest GPT-3 model improves over… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.03300v3-abstract-full').style.display = 'inline'; document.getElementById('2009.03300v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.03300v3-abstract-full" style="display: none;"> We propose a new test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more. To attain high accuracy on this test, models must possess extensive world knowledge and problem solving ability. We find that while most recent models have near random-chance accuracy, the very largest GPT-3 model improves over random chance by almost 20 percentage points on average. However, on every one of the 57 tasks, the best models still need substantial improvements before they can reach expert-level accuracy. Models also have lopsided performance and frequently do not know when they are wrong. Worse, they still have near-random accuracy on some socially important subjects such as morality and law. By comprehensively evaluating the breadth and depth of a model's academic and professional understanding, our test can be used to analyze models across many tasks and to identify important shortcomings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.03300v3-abstract-full').style.display = 'none'; document.getElementById('2009.03300v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2021; the test and code is available at https://github.com/hendrycks/test</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.11132">arXiv:1911.11132</a> <span> [<a href="https://arxiv.org/pdf/1911.11132">pdf</a>, <a href="https://arxiv.org/format/1911.11132">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Out-of-Distribution Detection for Real-World Settings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&query=Basart%2C+S">Steven Basart</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Andy Zou</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+J">Joe Kwon</a>, <a href="/search/cs?searchtype=author&query=Mostajabi%2C+M">Mohammadreza Mostajabi</a>, <a href="/search/cs?searchtype=author&query=Steinhardt%2C+J">Jacob Steinhardt</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.11132v4-abstract-short" style="display: inline;"> Detecting out-of-distribution examples is important for safety-critical machine learning applications such as detecting novel biological phenomena and self-driving cars. However, existing research mainly focuses on simple small-scale settings. To set the stage for more realistic out-of-distribution detection, we depart from small-scale settings and explore large-scale multiclass and multi-label se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.11132v4-abstract-full').style.display = 'inline'; document.getElementById('1911.11132v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.11132v4-abstract-full" style="display: none;"> Detecting out-of-distribution examples is important for safety-critical machine learning applications such as detecting novel biological phenomena and self-driving cars. However, existing research mainly focuses on simple small-scale settings. To set the stage for more realistic out-of-distribution detection, we depart from small-scale settings and explore large-scale multiclass and multi-label settings with high-resolution images and thousands of classes. To make future work in real-world settings possible, we create new benchmarks for three large-scale settings. To test ImageNet multiclass anomaly detectors, we introduce the Species dataset containing over 700,000 images and over a thousand anomalous species. We leverage ImageNet-21K to evaluate PASCAL VOC and COCO multilabel anomaly detectors. Third, we introduce a new benchmark for anomaly segmentation by introducing a segmentation benchmark with road anomalies. We conduct extensive experiments in these more realistic settings for out-of-distribution detection and find that a surprisingly simple detector based on the maximum logit outperforms prior methods in all the large-scale multi-class, multi-label, and segmentation tasks, establishing a simple new baseline for future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.11132v4-abstract-full').style.display = 'none'; document.getElementById('1911.11132v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2022; The Species dataset and code are available at https://github.com/hendrycks/anomaly-seg</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.14627">arXiv:1910.14627</a> <span> [<a href="https://arxiv.org/pdf/1910.14627">pdf</a>, <a href="https://arxiv.org/format/1910.14627">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> An Automatic Design Framework of Swarm Pattern Formation based on Multi-objective Genetic Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+Z">Zhun Fan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhaojun Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaomin Zhu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+B">Bingliang Hu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+A">Anmin Zou</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+D">Dongwei Bao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.14627v2-abstract-short" style="display: inline;"> Most existing swarm pattern formation methods depend on a predefined gene regulatory network (GRN) structure that requires designers' priori knowledge, which is difficult to adapt to complex and changeable environments. To dynamically adapt to the complex and changeable environments, we propose an automatic design framework of swarm pattern formation based on multi-objective genetic programming. T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.14627v2-abstract-full').style.display = 'inline'; document.getElementById('1910.14627v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.14627v2-abstract-full" style="display: none;"> Most existing swarm pattern formation methods depend on a predefined gene regulatory network (GRN) structure that requires designers' priori knowledge, which is difficult to adapt to complex and changeable environments. To dynamically adapt to the complex and changeable environments, we propose an automatic design framework of swarm pattern formation based on multi-objective genetic programming. The proposed framework does not need to define the structure of the GRN-based model in advance, and it applies some basic network motifs to automatically structure the GRN-based model. In addition, a multi-objective genetic programming (MOGP) combines with NSGA-II, namely MOGP-NSGA-II, to balance the complexity and accuracy of the GRN-based model. In evolutionary process, an MOGP-NSGA-II and differential evolution (DE) are applied to optimize the structures and parameters of the GRN-based model in parallel. Simulation results demonstrate that the proposed framework can effectively evolve some novel GRN-based models, and these GRN-based models not only have a simpler structure and a better performance, but also are robust to the complex and changeable environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.14627v2-abstract-full').style.display = 'none'; document.getElementById('1910.14627v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>