CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 76 results for author: <span class="mathjax">Talwalkar, A</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Talwalkar, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Talwalkar%2C+A&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Talwalkar, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02796">arXiv:2411.02796</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02796">pdf</a>, <a href="https://arxiv.org/format/2411.02796">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> Specialized Foundation Models Struggle to Beat Supervised Baselines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Zongzhe Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+R">Ritvik Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+W">Wenduo Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+A">Alexander Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+J">Junhong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02796v1-abstract-short" style="display: inline;"> Following its success for vision and text, the &#34;foundation model&#34; (FM) paradigm -- pretraining large models on massive data, then fine-tuning on target tasks -- has rapidly expanded to domains in the sciences, engineering, healthcare, and beyond. Has this achieved what the original FMs accomplished, i.e. the supplanting of traditional supervised learning in their domains? To answer we look at thre&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02796v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02796v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02796v1-abstract-full" style="display: none;"> Following its success for vision and text, the &#34;foundation model&#34; (FM) paradigm -- pretraining large models on massive data, then fine-tuning on target tasks -- has rapidly expanded to domains in the sciences, engineering, healthcare, and beyond. Has this achieved what the original FMs accomplished, i.e. the supplanting of traditional supervised learning in their domains? To answer we look at three modalities -- genomics, satellite imaging, and time series -- with multiple recent FMs and compare them to a standard supervised learning workflow: model development, hyperparameter tuning, and training, all using only data from the target task. Across these three specialized domains, we find that it is consistently possible to train simple supervised models -- no more complicated than a lightly modified wide ResNet or UNet -- that match or even outperform the latest foundation models. Our work demonstrates that the benefits of large-scale pretraining have yet to be realized in many specialized areas, reinforces the need to compare new FMs to strong, well-tuned baselines, and introduces two new, easy-to-use, open-source, and automated workflows for doing so. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02796v1-abstract-full').style.display = 'none'; document.getElementById('2411.02796v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The first two authors contributed equally. The order was determined by coin flip</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24206">arXiv:2410.24206</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.24206">pdf</a>, <a href="https://arxiv.org/format/2410.24206">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Understanding Optimization in Deep Learning with Central Flows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+J+M">Jeremy M. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Damian%2C+A">Alex Damian</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Kolter%2C+Z">Zico Kolter</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J+D">Jason D. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24206v1-abstract-short" style="display: inline;"> Optimization in deep learning remains poorly understood, even in the simple setting of deterministic (i.e. full-batch) training. A key difficulty is that much of an optimizer&#39;s behavior is implicitly determined by complex oscillatory dynamics, referred to as the &#34;edge of stability.&#34; The main contribution of this paper is to show that an optimizer&#39;s implicit behavior can be explicitly captured by a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24206v1-abstract-full').style.display = 'inline'; document.getElementById('2410.24206v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24206v1-abstract-full" style="display: none;"> Optimization in deep learning remains poorly understood, even in the simple setting of deterministic (i.e. full-batch) training. A key difficulty is that much of an optimizer&#39;s behavior is implicitly determined by complex oscillatory dynamics, referred to as the &#34;edge of stability.&#34; The main contribution of this paper is to show that an optimizer&#39;s implicit behavior can be explicitly captured by a &#34;central flow:&#34; a differential equation which models the time-averaged optimization trajectory. We show that these flows can empirically predict long-term optimization trajectories of generic neural networks with a high degree of numerical accuracy. By interpreting these flows, we reveal for the first time 1) the precise sense in which RMSProp adapts to the local loss landscape, and 2) an &#34;acceleration via regularization&#34; mechanism, wherein adaptive optimizers implicitly navigate towards low-curvature regions in which they can take larger steps. This mechanism is key to the efficacy of these adaptive optimizers. Overall, we believe that central flows constitute a promising tool for reasoning about optimization in deep learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24206v1-abstract-full').style.display = 'none'; document.getElementById('2410.24206v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">first two authors contributed equally; author order determined by coin flip</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04596">arXiv:2410.04596</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.04596">pdf</a>, <a href="https://arxiv.org/format/2410.04596">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Need Help? Designing Proactive AI Assistants for Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+A">Alan Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Sebastian Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Mozannar%2C+H">Hussein Mozannar</a>, <a href="/search/cs?searchtype=author&amp;query=Sontag%2C+D">David Sontag</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04596v1-abstract-short" style="display: inline;"> While current chat-based AI assistants primarily operate reactively, responding only when prompted by users, there is significant potential for these systems to proactively assist in tasks without explicit invocation, enabling a mixed-initiative interaction. This work explores the design and implementation of proactive AI assistants powered by large language models. We first outline the key design&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04596v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04596v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04596v1-abstract-full" style="display: none;"> While current chat-based AI assistants primarily operate reactively, responding only when prompted by users, there is significant potential for these systems to proactively assist in tasks without explicit invocation, enabling a mixed-initiative interaction. This work explores the design and implementation of proactive AI assistants powered by large language models. We first outline the key design considerations for building effective proactive assistants. As a case study, we propose a proactive chat-based programming assistant that automatically provides suggestions and facilitates their integration into the programmer&#39;s code. The programming context provides a shared workspace enabling the assistant to offer more relevant suggestions. We conducted a randomized experimental study examining the impact of various design elements of the proactive assistant on programmer productivity and user experience. Our findings reveal significant benefits of incorporating proactive chat assistants into coding environments and uncover important nuances that influence their usage and effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04596v1-abstract-full').style.display = 'none'; document.getElementById('2410.04596v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12089">arXiv:2409.12089</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.12089">pdf</a>, <a href="https://arxiv.org/format/2409.12089">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Impact of Element Ordering on LM Agent Performance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chi%2C+W">Wayne Chi</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Donahue%2C+C">Chris Donahue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12089v3-abstract-short" style="display: inline;"> There has been a surge of interest in language model agents that can navigate virtual environments such as the web or desktop. To navigate such environments, agents benefit from information on the various elements (e.g., buttons, text, or images) present. It remains unclear which element attributes have the greatest impact on agent performance, especially in environments that only provide a graphi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12089v3-abstract-full').style.display = 'inline'; document.getElementById('2409.12089v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12089v3-abstract-full" style="display: none;"> There has been a surge of interest in language model agents that can navigate virtual environments such as the web or desktop. To navigate such environments, agents benefit from information on the various elements (e.g., buttons, text, or images) present. It remains unclear which element attributes have the greatest impact on agent performance, especially in environments that only provide a graphical representation (i.e., pixels). Here we find that the ordering in which elements are presented to the language model is surprisingly impactful--randomizing element ordering in a webpage degrades agent performance comparably to removing all visible text from an agent&#39;s state representation. While a webpage provides a hierarchical ordering of elements, there is no such ordering when parsing elements directly from pixels. Moreover, as tasks become more challenging and models more sophisticated, our experiments suggest that the impact of ordering increases. Finding an effective ordering is non-trivial. We investigate the impact of various element ordering methods in web and desktop environments. We find that dimensionality reduction provides a viable ordering for pixel-only environments. We train a UI element detection model to derive elements from pixels and apply our findings to an agent benchmark--OmniACT--where we only have access to pixels. Our method completes more than two times as many tasks on average relative to the previous state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12089v3-abstract-full').style.display = 'none'; document.getElementById('2409.12089v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12804">arXiv:2407.12804</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.12804">pdf</a>, <a href="https://arxiv.org/format/2407.12804">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Modulating Language Model Experiences through Frictions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Collins%2C+K+M">Katherine M. Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Sucholutsky%2C+I">Ilia Sucholutsky</a>, <a href="/search/cs?searchtype=author&amp;query=Kirk%2C+H+R">Hannah Rose Kirk</a>, <a href="/search/cs?searchtype=author&amp;query=Sadek%2C+M">Malak Sadek</a>, <a href="/search/cs?searchtype=author&amp;query=Sargeant%2C+H">Holli Sargeant</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Weller%2C+A">Adrian Weller</a>, <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+U">Umang Bhatt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12804v2-abstract-short" style="display: inline;"> Language models are transforming the ways that their users engage with the world. Despite impressive capabilities, over-consumption of language model outputs risks propagating unchecked errors in the short-term and damaging human capabilities for critical thinking in the long-term. How can we develop scaffolding around language models to curate more appropriate use? We propose selective frictions&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12804v2-abstract-full').style.display = 'inline'; document.getElementById('2407.12804v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12804v2-abstract-full" style="display: none;"> Language models are transforming the ways that their users engage with the world. Despite impressive capabilities, over-consumption of language model outputs risks propagating unchecked errors in the short-term and damaging human capabilities for critical thinking in the long-term. How can we develop scaffolding around language models to curate more appropriate use? We propose selective frictions for language model experiences, inspired by behavioral science interventions, to dampen misuse. Frictions involve small modifications to a user&#39;s experience, e.g., the addition of a button impeding model access and reminding a user of their expertise relative to the model. Through a user study with real humans, we observe shifts in user behavior from the imposition of a friction over LLMs in the context of a multi-topic question-answering task as a representative task that people may use LLMs for, e.g., in education and information retrieval. We find that frictions modulate over-reliance by driving down users&#39; click rates while minimally affecting accuracy for those topics. Yet, frictions may have unintended effects. We find marked differences in users&#39; click behaviors even on topics where frictions were not provisioned. Our contributions motivate further study of human-AI behavioral interaction to inform more effective and appropriate LLM use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12804v2-abstract-full').style.display = 'none'; document.getElementById('2407.12804v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS Workshop on Behavioral ML; non-archival</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02348">arXiv:2407.02348</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.02348">pdf</a>, <a href="https://arxiv.org/format/2407.02348">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Cascaded Ensembles for Efficient Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kolawole%2C+S">Steven Kolawole</a>, <a href="/search/cs?searchtype=author&amp;query=Dennis%2C+D">Don Dennis</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02348v1-abstract-short" style="display: inline;"> A common approach to make machine learning inference more efficient is to use example-specific adaptive schemes, which route or select models for each example at inference time. In this work we study a simple scheme for adaptive inference. We build a cascade of ensembles (CoE), beginning with resource-efficient models and growing to larger, more expressive models, where ensemble agreement serves a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02348v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02348v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02348v1-abstract-full" style="display: none;"> A common approach to make machine learning inference more efficient is to use example-specific adaptive schemes, which route or select models for each example at inference time. In this work we study a simple scheme for adaptive inference. We build a cascade of ensembles (CoE), beginning with resource-efficient models and growing to larger, more expressive models, where ensemble agreement serves as a data-dependent routing criterion. This scheme is easy to incorporate into existing inference pipelines, requires no additional training, and can be used to place models across multiple resource tiers--for instance, serving efficient models at the edge and invoking larger models in the cloud only when necessary. In cases where parallel inference is feasible, we show that CoE can improve accuracy relative to the single best model while reducing the average cost of inference by up to 7x, and provides Pareto-dominate solutions in accuracy and efficiency relative to existing adaptive inference baselines. These savings translate to an over 3x-reduction in total monetary cost when performing inference using a heterogeneous cluster of GPUs. Finally, for edge inference scenarios where portions of the cascade reside at the edge vs. in the cloud, CoE can provide a 14x reduction in communication cost and inference latency without sacrificing accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02348v1-abstract-full').style.display = 'none'; document.getElementById('2407.02348v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ES-FOMO, ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.02806">arXiv:2404.02806</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.02806">pdf</a>, <a href="https://arxiv.org/format/2404.02806">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> The RealHumanEval: Evaluating Large Language Models&#39; Abilities to Support Programmers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mozannar%2C+H">Hussein Mozannar</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Alsobay%2C+M">Mohammed Alsobay</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+S">Subhro Das</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Sebastian Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+D">Dennis Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Nagireddy%2C+M">Manish Nagireddy</a>, <a href="/search/cs?searchtype=author&amp;query=Sattigeri%2C+P">Prasanna Sattigeri</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Sontag%2C+D">David Sontag</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.02806v2-abstract-short" style="display: inline;"> Evaluation of large language models for code has primarily relied on static benchmarks, including HumanEval (Chen et al., 2021), or more recently using human preferences of LLM responses. As LLMs are increasingly used as programmer assistants, we study whether gains on existing benchmarks or more preferred LLM responses translate to programmer productivity when coding with LLMs, including time spe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02806v2-abstract-full').style.display = 'inline'; document.getElementById('2404.02806v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.02806v2-abstract-full" style="display: none;"> Evaluation of large language models for code has primarily relied on static benchmarks, including HumanEval (Chen et al., 2021), or more recently using human preferences of LLM responses. As LLMs are increasingly used as programmer assistants, we study whether gains on existing benchmarks or more preferred LLM responses translate to programmer productivity when coding with LLMs, including time spent coding. We introduce RealHumanEval, a web interface to measure the ability of LLMs to assist programmers, through either autocomplete or chat support. We conducted a user study (N=243) using RealHumanEval in which users interacted with seven LLMs of varying base model performance. Despite static benchmarks not incorporating humans-in-the-loop, we find that improvements in benchmark performance lead to increased programmer productivity; however gaps in benchmark versus human performance are not proportional -- a trend that holds across both forms of LLM support. In contrast, we find that programmer preferences do not correlate with their actual performance, motivating the need for better proxy signals. We open-source RealHumanEval to enable human-centric evaluation of new models and the study data to facilitate efforts to improve code models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02806v2-abstract-full').style.display = 'none'; document.getElementById('2404.02806v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07187">arXiv:2403.07187</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.07187">pdf</a>, <a href="https://arxiv.org/format/2403.07187">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> UPS: Efficiently Building Foundation Models for PDE Solving via Cross-Modal Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shen%2C+J">Junhong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Marwah%2C+T">Tanya Marwah</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07187v3-abstract-short" style="display: inline;"> We present Unified PDE Solvers (UPS), a data- and compute-efficient approach to developing unified neural operators for diverse families of spatiotemporal PDEs from various domains, dimensions, and resolutions. UPS embeds different PDEs into a shared representation space and processes them using a FNO-transformer architecture. Rather than training the network from scratch, which is data-demanding&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07187v3-abstract-full').style.display = 'inline'; document.getElementById('2403.07187v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07187v3-abstract-full" style="display: none;"> We present Unified PDE Solvers (UPS), a data- and compute-efficient approach to developing unified neural operators for diverse families of spatiotemporal PDEs from various domains, dimensions, and resolutions. UPS embeds different PDEs into a shared representation space and processes them using a FNO-transformer architecture. Rather than training the network from scratch, which is data-demanding and computationally expensive, we warm-start the transformer from pretrained LLMs and perform explicit alignment to reduce the modality gap while improving data and compute efficiency. The cross-modal UPS achieves state-of-the-art results on a wide range of 1D and 2D PDE families from PDEBench, outperforming existing unified models using 4 times less data and 26 times less compute. Meanwhile, it is capable of few-shot transfer to unseen PDE families and coefficients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07187v3-abstract-full').style.display = 'none'; document.getElementById('2403.07187v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024 AI for Science Workshop (Spotlight)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05406">arXiv:2402.05406</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.05406">pdf</a>, <a href="https://arxiv.org/format/2402.05406">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Everybody Prune Now: Structured Pruning of LLMs with only Forward Passes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dery%2C+L">Lucio Dery</a>, <a href="/search/cs?searchtype=author&amp;query=Kolawole%2C+S">Steven Kolawole</a>, <a href="/search/cs?searchtype=author&amp;query=Kagy%2C+J">Jean-Fran莽ois Kagy</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+V">Virginia Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05406v2-abstract-short" style="display: inline;"> Given the generational gap in available hardware between lay practitioners and the most endowed institutions, LLMs are becoming increasingly inaccessible as they grow in size. Whilst many approaches have been proposed to compress LLMs to make their resource consumption manageable, these methods themselves tend to be resource intensive, putting them out of the reach of the very user groups they tar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05406v2-abstract-full').style.display = 'inline'; document.getElementById('2402.05406v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05406v2-abstract-full" style="display: none;"> Given the generational gap in available hardware between lay practitioners and the most endowed institutions, LLMs are becoming increasingly inaccessible as they grow in size. Whilst many approaches have been proposed to compress LLMs to make their resource consumption manageable, these methods themselves tend to be resource intensive, putting them out of the reach of the very user groups they target. In this work, we explore the problem of structured pruning of LLMs using only forward passes. We seek to empower practitioners to prune models so large that their available hardware has just enough memory to run inference. We develop Bonsai, a gradient-free, perturbative pruning method capable of delivering small, fast, and accurate pruned models. We observe that Bonsai outputs pruned models that (i) outperform those generated by more expensive gradient-based structured pruning methods, and (ii) are twice as fast (with comparable accuracy) as those generated by semi-structured pruning methods requiring comparable resources as Bonsai. We also leverage Bonsai to produce a new sub-2B model using a single A6000 that yields state-of-the-art performance on 4/6 tasks on the Huggingface Open LLM leaderboard. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05406v2-abstract-full').style.display = 'none'; document.getElementById('2402.05406v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 4 fiigures, 15 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.03151">arXiv:2312.03151</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.03151">pdf</a>, <a href="https://arxiv.org/format/2312.03151">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multitask Learning Can Improve Worst-Group Outcomes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kulkarni%2C+A">Atharva Kulkarni</a>, <a href="/search/cs?searchtype=author&amp;query=Dery%2C+L">Lucio Dery</a>, <a href="/search/cs?searchtype=author&amp;query=Setlur%2C+A">Amrith Setlur</a>, <a href="/search/cs?searchtype=author&amp;query=Raghunathan%2C+A">Aditi Raghunathan</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.03151v2-abstract-short" style="display: inline;"> In order to create machine learning systems that serve a variety of users well, it is vital to not only achieve high average performance but also ensure equitable outcomes across diverse groups. However, most machine learning methods are designed to improve a model&#39;s average performance on a chosen end task without consideration for their impact on worst group error. Multitask learning (MTL) is on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03151v2-abstract-full').style.display = 'inline'; document.getElementById('2312.03151v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.03151v2-abstract-full" style="display: none;"> In order to create machine learning systems that serve a variety of users well, it is vital to not only achieve high average performance but also ensure equitable outcomes across diverse groups. However, most machine learning methods are designed to improve a model&#39;s average performance on a chosen end task without consideration for their impact on worst group error. Multitask learning (MTL) is one such widely used technique. In this paper, we seek not only to understand the impact of MTL on worst-group accuracy but also to explore its potential as a tool to address the challenge of group-wise fairness. We primarily consider the standard setting of fine-tuning a pre-trained model, where, following recent work \citep{gururangan2020don, dery2023aang}, we multitask the end task with the pre-training objective constructed from the end task data itself. In settings with few or no group annotations, we find that multitasking often, but not consistently, achieves better worst-group accuracy than Just-Train-Twice (JTT; \citet{pmlr-v139-liu21f}) -- a representative distributionally robust optimization (DRO) method. Leveraging insights from synthetic data experiments, we propose to modify standard MTL by regularizing the joint multitask representation space. We run a large number of fine-tuning experiments across computer vision and natural language processing datasets and find that our regularized MTL approach \emph{consistently} outperforms JTT on both average and worst-group outcomes. Our official code can be found here: \href{https://github.com/atharvajk98/MTL-group-robustness.git}{\url{https://github.com/atharvajk98/MTL-group-robustness}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03151v2-abstract-full').style.display = 'none'; document.getElementById('2312.03151v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 7 tables, 6 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.04076">arXiv:2311.04076</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.04076">pdf</a>, <a href="https://arxiv.org/format/2311.04076">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Do LLMs exhibit human-like response biases? A case study in survey design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tjuatja%2C+L">Lindia Tjuatja</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+S+T">Sherry Tongshuang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.04076v5-abstract-short" style="display: inline;"> As large language models (LLMs) become more capable, there is growing excitement about the possibility of using LLMs as proxies for humans in real-world tasks where subjective labels are desired, such as in surveys and opinion polling. One widely-cited barrier to the adoption of LLMs as proxies for humans in subjective tasks is their sensitivity to prompt wording - but interestingly, humans also d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04076v5-abstract-full').style.display = 'inline'; document.getElementById('2311.04076v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.04076v5-abstract-full" style="display: none;"> As large language models (LLMs) become more capable, there is growing excitement about the possibility of using LLMs as proxies for humans in real-world tasks where subjective labels are desired, such as in surveys and opinion polling. One widely-cited barrier to the adoption of LLMs as proxies for humans in subjective tasks is their sensitivity to prompt wording - but interestingly, humans also display sensitivities to instruction changes in the form of response biases. We investigate the extent to which LLMs reflect human response biases, if at all. We look to survey design, where human response biases caused by changes in the wordings of &#34;prompts&#34; have been extensively explored in social psychology literature. Drawing from these works, we design a dataset and framework to evaluate whether LLMs exhibit human-like response biases in survey questionnaires. Our comprehensive evaluation of nine models shows that popular open and commercial LLMs generally fail to reflect human-like behavior, particularly in models that have undergone RLHF. Furthermore, even if a model shows a significant change in the same direction as humans, we find that they are sensitive to perturbations that do not elicit significant changes in humans. These results highlight the pitfalls of using LLMs as human proxies, and underscore the need for finer-grained characterizations of model behavior. Our code, dataset, and collected samples are available at https://github.com/lindiatjuatja/BiasMonkey <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04076v5-abstract-full').style.display = 'none'; document.getElementById('2311.04076v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.02246">arXiv:2310.02246</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.02246">pdf</a>, <a href="https://arxiv.org/format/2310.02246">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning to Relax: Setting Solver Parameters Across a Sequence of Linear System Instances </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Chow%2C+E">Edmond Chow</a>, <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.02246v2-abstract-short" style="display: inline;"> Solving a linear system $Ax=b$ is a fundamental scientific computing primitive for which numerous solvers and preconditioners have been developed. These come with parameters whose optimal values depend on the system being solved and are often impossible or too expensive to identify; thus in practice sub-optimal heuristics are used. We consider the common setting in which many related linear system&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02246v2-abstract-full').style.display = 'inline'; document.getElementById('2310.02246v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.02246v2-abstract-full" style="display: none;"> Solving a linear system $Ax=b$ is a fundamental scientific computing primitive for which numerous solvers and preconditioners have been developed. These come with parameters whose optimal values depend on the system being solved and are often impossible or too expensive to identify; thus in practice sub-optimal heuristics are used. We consider the common setting in which many related linear systems need to be solved, e.g. during a single numerical simulation. In this scenario, can we sequentially choose parameters that attain a near-optimal overall number of iterations, without extra matrix computations? We answer in the affirmative for Successive Over-Relaxation (SOR), a standard solver whose parameter $蠅$ has a strong impact on its runtime. For this method, we prove that a bandit online learning algorithm--using only the number of iterations as feedback--can select parameters for a sequence of instances such that the overall cost approaches that of the best fixed $蠅$ as the sequence length increases. Furthermore, when given additional structural information, we show that a contextual bandit method asymptotically achieves the performance of the instance-optimal policy, which selects the best $蠅$ for each instance. Our work provides the first learning-theoretic treatment of high-precision linear system solvers and the first end-to-end guarantees for data-driven scientific computing, demonstrating theoretically the potential to speed up numerical methods using well-understood learning algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02246v2-abstract-full').style.display = 'none'; document.getElementById('2310.02246v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2024 Spotlight</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.15475">arXiv:2307.15475</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.15475">pdf</a>, <a href="https://arxiv.org/format/2307.15475">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FeedbackLogs: Recording and Incorporating Stakeholder Feedback into Machine Learning Pipelines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Barker%2C+M">Matthew Barker</a>, <a href="/search/cs?searchtype=author&amp;query=Kallina%2C+E">Emma Kallina</a>, <a href="/search/cs?searchtype=author&amp;query=Ashok%2C+D">Dhananjay Ashok</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+K+M">Katherine M. Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Casovan%2C+A">Ashley Casovan</a>, <a href="/search/cs?searchtype=author&amp;query=Weller%2C+A">Adrian Weller</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+U">Umang Bhatt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.15475v1-abstract-short" style="display: inline;"> Even though machine learning (ML) pipelines affect an increasing array of stakeholders, there is little work on how input from stakeholders is recorded and incorporated. We propose FeedbackLogs, addenda to existing documentation of ML pipelines, to track the input of multiple stakeholders. Each log records important details about the feedback collection process, the feedback itself, and how the fe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.15475v1-abstract-full').style.display = 'inline'; document.getElementById('2307.15475v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.15475v1-abstract-full" style="display: none;"> Even though machine learning (ML) pipelines affect an increasing array of stakeholders, there is little work on how input from stakeholders is recorded and incorporated. We propose FeedbackLogs, addenda to existing documentation of ML pipelines, to track the input of multiple stakeholders. Each log records important details about the feedback collection process, the feedback itself, and how the feedback is used to update the ML pipeline. In this paper, we introduce and formalise a process for collecting a FeedbackLog. We also provide concrete use cases where FeedbackLogs can be employed as evidence for algorithmic auditing and as a tool to record updates based on stakeholder feedback. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.15475v1-abstract-full').style.display = 'none'; document.getElementById('2307.15475v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.08167">arXiv:2306.08167</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.08167">pdf</a>, <a href="https://arxiv.org/format/2306.08167">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1609/hcomp.v11i1.27548">10.1609/hcomp.v11i1.27548 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Where Does My Model Underperform? A Human Evaluation of Slice Discovery Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+N">Nari Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Cabrera%2C+%C3%81+A">脕ngel Alexander Cabrera</a>, <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.08167v2-abstract-short" style="display: inline;"> Machine learning (ML) models that achieve high average accuracy can still underperform on semantically coherent subsets (&#34;slices&#34;) of data. This behavior can have significant societal consequences for the safety or bias of the model in deployment, but identifying these underperforming slices can be difficult in practice, especially in domains where practitioners lack access to group annotations to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08167v2-abstract-full').style.display = 'inline'; document.getElementById('2306.08167v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.08167v2-abstract-full" style="display: none;"> Machine learning (ML) models that achieve high average accuracy can still underperform on semantically coherent subsets (&#34;slices&#34;) of data. This behavior can have significant societal consequences for the safety or bias of the model in deployment, but identifying these underperforming slices can be difficult in practice, especially in domains where practitioners lack access to group annotations to define coherent subsets of their data. Motivated by these challenges, ML researchers have developed new slice discovery algorithms that aim to group together coherent and high-error subsets of data. However, there has been little evaluation focused on whether these tools help humans form correct hypotheses about where (for which groups) their model underperforms. We conduct a controlled user study (N = 15) where we show 40 slices output by two state-of-the-art slice discovery algorithms to users, and ask them to form hypotheses about an object detection model. Our results provide positive evidence that these tools provide some benefit over a naive baseline, and also shed light on challenges faced by users during the hypothesis formation step. We conclude by discussing design opportunities for ML and HCI researchers. Our findings point to the importance of centering users when creating and evaluating new tools for slice discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08167v2-abstract-full').style.display = 'none'; document.getElementById('2306.08167v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the AAAI Conference on Human Computation and Crowdsourcing, 11(1), 65-76. Best Paper Award</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.06701">arXiv:2304.06701</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.06701">pdf</a>, <a href="https://arxiv.org/format/2304.06701">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Learning Personalized Decision Support Policies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+U">Umang Bhatt</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+K+M">Katherine M. Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Kamalaruban%2C+P">Parameswaran Kamalaruban</a>, <a href="/search/cs?searchtype=author&amp;query=Kallina%2C+E">Emma Kallina</a>, <a href="/search/cs?searchtype=author&amp;query=Weller%2C+A">Adrian Weller</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.06701v2-abstract-short" style="display: inline;"> Individual human decision-makers may benefit from different forms of support to improve decision outcomes, but when each form of support will yield better outcomes? In this work, we posit that personalizing access to decision support tools can be an effective mechanism for instantiating the appropriate use of AI assistance. Specifically, we propose the general problem of learning a decision suppor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06701v2-abstract-full').style.display = 'inline'; document.getElementById('2304.06701v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.06701v2-abstract-full" style="display: none;"> Individual human decision-makers may benefit from different forms of support to improve decision outcomes, but when each form of support will yield better outcomes? In this work, we posit that personalizing access to decision support tools can be an effective mechanism for instantiating the appropriate use of AI assistance. Specifically, we propose the general problem of learning a decision support policy that, for a given input, chooses which form of support to provide to decision-makers for whom we initially have no prior information. We develop $\texttt{Modiste}$, an interactive tool to learn personalized decision support policies. $\texttt{Modiste}$ leverages stochastic contextual bandit techniques to personalize a decision support policy for each decision-maker and supports extensions to the multi-objective setting to account for auxiliary objectives like the cost of support. We find that personalized policies outperform offline policies, and, in the cost-aware setting, reduce the incurred cost with minimal degradation to performance. Our experiments include various realistic forms of support (e.g., expert consensus and predictions from a large language model) on vision and language tasks. Our human subject experiments validate our computational experiments, demonstrating that personalization can yield benefits in practice for real users, who interact with $\texttt{Modiste}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06701v2-abstract-full').style.display = 'none'; document.getElementById('2304.06701v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08450">arXiv:2302.08450</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.08450">pdf</a>, <a href="https://arxiv.org/format/2302.08450">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Assisting Human Decisions in Document Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J+S">Joon Sik Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Shah%2C+N+B">Nihar B. Shah</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08450v1-abstract-short" style="display: inline;"> Many practical applications, ranging from paper-reviewer assignment in peer review to job-applicant matching for hiring, require human decision makers to identify relevant matches by combining their expertise with predictions from machine learning models. In many such model-assisted document matching tasks, the decision makers have stressed the need for assistive information about the model output&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08450v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08450v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08450v1-abstract-full" style="display: none;"> Many practical applications, ranging from paper-reviewer assignment in peer review to job-applicant matching for hiring, require human decision makers to identify relevant matches by combining their expertise with predictions from machine learning models. In many such model-assisted document matching tasks, the decision makers have stressed the need for assistive information about the model outputs (or the data) to facilitate their decisions. In this paper, we devise a proxy matching task that allows us to evaluate which kinds of assistive information improve decision makers&#39; performance (in terms of accuracy and time). Through a crowdsourced (N=271 participants) study, we find that providing black-box model explanations reduces users&#39; accuracy on the matching task, contrary to the commonly-held belief that they can be helpful by allowing better understanding of the model. On the other hand, custom methods that are designed to closely attend to some task-specific desiderata are found to be effective in improving user performance. Surprisingly, we also find that the users&#39; perceived utility of assistive information is misaligned with their objective utility (measured through their task performance). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08450v1-abstract-full').style.display = 'none'; document.getElementById('2302.08450v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.05738">arXiv:2302.05738</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.05738">pdf</a>, <a href="https://arxiv.org/format/2302.05738">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cross-Modal Fine-Tuning: Align then Refine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shen%2C+J">Junhong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liam Li</a>, <a href="/search/cs?searchtype=author&amp;query=Dery%2C+L+M">Lucio M. Dery</a>, <a href="/search/cs?searchtype=author&amp;query=Staten%2C+C">Corey Staten</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.05738v2-abstract-short" style="display: inline;"> Fine-tuning large-scale pretrained models has led to tremendous progress in well-studied modalities such as vision and NLP. However, similar gains have not been observed in many other modalities due to a lack of relevant pretrained models. In this work, we propose ORCA, a general cross-modal fine-tuning framework that extends the applicability of a single large-scale pretrained model to diverse mo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05738v2-abstract-full').style.display = 'inline'; document.getElementById('2302.05738v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.05738v2-abstract-full" style="display: none;"> Fine-tuning large-scale pretrained models has led to tremendous progress in well-studied modalities such as vision and NLP. However, similar gains have not been observed in many other modalities due to a lack of relevant pretrained models. In this work, we propose ORCA, a general cross-modal fine-tuning framework that extends the applicability of a single large-scale pretrained model to diverse modalities. ORCA adapts to a target task via an align-then-refine workflow: given the target input, ORCA first learns an embedding network that aligns the embedded feature distribution with the pretraining modality. The pretrained model is then fine-tuned on the embedded data to exploit the knowledge shared across modalities. Through extensive experiments, we show that ORCA obtains state-of-the-art results on 3 benchmarks containing over 60 datasets from 12 modalities, outperforming a wide range of hand-designed, AutoML, general-purpose, and task-specific methods. We highlight the importance of data alignment via a series of ablation studies and demonstrate ORCA&#39;s utility in data-limited regimes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05738v2-abstract-full').style.display = 'none'; document.getElementById('2302.05738v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.04732">arXiv:2302.04732</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.04732">pdf</a>, <a href="https://arxiv.org/format/2302.04732">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3544548.3581268">10.1145/3544548.3581268 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Zeno: An Interactive Framework for Behavioral Evaluation of Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cabrera%2C+%C3%81+A">脕ngel Alexander Cabrera</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+E">Erica Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Bertucci%2C+D">Donald Bertucci</a>, <a href="/search/cs?searchtype=author&amp;query=Holstein%2C+K">Kenneth Holstein</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+J+I">Jason I. Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Perer%2C+A">Adam Perer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.04732v1-abstract-short" style="display: inline;"> Machine learning models with high accuracy on test data can still produce systematic failures, such as harmful biases and safety issues, when deployed in the real world. To detect and mitigate such failures, practitioners run behavioral evaluation of their models, checking model outputs for specific types of inputs. Behavioral evaluation is important but challenging, requiring that practitioners d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.04732v1-abstract-full').style.display = 'inline'; document.getElementById('2302.04732v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.04732v1-abstract-full" style="display: none;"> Machine learning models with high accuracy on test data can still produce systematic failures, such as harmful biases and safety issues, when deployed in the real world. To detect and mitigate such failures, practitioners run behavioral evaluation of their models, checking model outputs for specific types of inputs. Behavioral evaluation is important but challenging, requiring that practitioners discover real-world patterns and validate systematic failures. We conducted 18 semi-structured interviews with ML practitioners to better understand the challenges of behavioral evaluation and found that it is a collaborative, use-case-first process that is not adequately supported by existing task- and domain-specific tools. Using these findings, we designed Zeno, a general-purpose framework for visualizing and testing AI systems across diverse use cases. In four case studies with participants using Zeno on real-world models, we found that practitioners were able to reproduce previous manual analyses and discover new systematic failures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.04732v1-abstract-full').style.display = 'none'; document.getElementById('2302.04732v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08930">arXiv:2212.08930</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.08930">pdf</a>, <a href="https://arxiv.org/format/2212.08930">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On Noisy Evaluation in Federated Hyperparameter Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kuo%2C+K">Kevin Kuo</a>, <a href="/search/cs?searchtype=author&amp;query=Thaker%2C+P">Pratiksha Thaker</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+J">John Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+D">Daniel Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08930v4-abstract-short" style="display: inline;"> Hyperparameter tuning is critical to the success of federated learning applications. Unfortunately, appropriately selecting hyperparameters is challenging in federated networks. Issues of scale, privacy, and heterogeneity introduce noise in the tuning process and make it difficult to evaluate the performance of various hyperparameters. In this work, we perform the first systematic study on the eff&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08930v4-abstract-full').style.display = 'inline'; document.getElementById('2212.08930v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08930v4-abstract-full" style="display: none;"> Hyperparameter tuning is critical to the success of federated learning applications. Unfortunately, appropriately selecting hyperparameters is challenging in federated networks. Issues of scale, privacy, and heterogeneity introduce noise in the tuning process and make it difficult to evaluate the performance of various hyperparameters. In this work, we perform the first systematic study on the effect of noisy evaluation in federated hyperparameter tuning. We first identify and rigorously explore key sources of noise, including client subsampling, data and systems heterogeneity, and data privacy. Surprisingly, our results indicate that even small amounts of noise can significantly impact tuning methods-reducing the performance of state-of-the-art approaches to that of naive baselines. To address noisy evaluation in such scenarios, we propose a simple and effective approach that leverages public proxy data to boost the evaluation signal. Our work establishes general challenges, baselines, and best practices for future work in federated hyperparameter tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08930v4-abstract-full').style.display = 'none'; document.getElementById('2212.08930v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">v1: 19 pages, 15 figures, submitted to MLSys2023; v2: Fixed citation formatting; v3: Fixed typo, update acks v4: MLSys2023 camera-ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.03324">arXiv:2210.03324</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.03324">pdf</a>, <a href="https://arxiv.org/format/2210.03324">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> AutoML for Climate Change: A Call to Action </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tu%2C+R">Renbo Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Roberts%2C+N">Nicholas Roberts</a>, <a href="/search/cs?searchtype=author&amp;query=Prasad%2C+V">Vishak Prasad</a>, <a href="/search/cs?searchtype=author&amp;query=Nayak%2C+S">Sibasis Nayak</a>, <a href="/search/cs?searchtype=author&amp;query=Jain%2C+P">Paarth Jain</a>, <a href="/search/cs?searchtype=author&amp;query=Sala%2C+F">Frederic Sala</a>, <a href="/search/cs?searchtype=author&amp;query=Ramakrishnan%2C+G">Ganesh Ramakrishnan</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Neiswanger%2C+W">Willie Neiswanger</a>, <a href="/search/cs?searchtype=author&amp;query=White%2C+C">Colin White</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.03324v1-abstract-short" style="display: inline;"> The challenge that climate change poses to humanity has spurred a rapidly developing field of artificial intelligence research focused on climate change applications. The climate change AI (CCAI) community works on a diverse, challenging set of problems which often involve physics-constrained ML or heterogeneous spatiotemporal data. It would be desirable to use automated machine learning (AutoML)&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03324v1-abstract-full').style.display = 'inline'; document.getElementById('2210.03324v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.03324v1-abstract-full" style="display: none;"> The challenge that climate change poses to humanity has spurred a rapidly developing field of artificial intelligence research focused on climate change applications. The climate change AI (CCAI) community works on a diverse, challenging set of problems which often involve physics-constrained ML or heterogeneous spatiotemporal data. It would be desirable to use automated machine learning (AutoML) techniques to automatically find high-performing architectures and hyperparameters for a given dataset. In this work, we benchmark popular AutoML libraries on three high-leverage CCAI applications: climate modeling, wind power forecasting, and catalyst discovery. We find that out-of-the-box AutoML libraries currently fail to meaningfully surpass the performance of human-designed CCAI models. However, we also identify a few key weaknesses, which stem from the fact that most AutoML techniques are tailored to computer vision and NLP applications. For example, while dozens of search spaces have been designed for image and language data, none have been designed for spatiotemporal data. Addressing these key weaknesses can lead to the discovery of novel architectures that yield substantial performance gains across numerous CCAI applications. Therefore, we present a call to action to the AutoML community, since there are a number of concrete, promising directions for future work in the space of AutoML for CCAI. We release our code and a list of resources at https://github.com/climate-change-automl/climate-change-automl. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03324v1-abstract-full').style.display = 'none'; document.getElementById('2210.03324v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.12218">arXiv:2208.12218</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.12218">pdf</a>, <a href="https://arxiv.org/format/2208.12218">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SONAR: Joint Architecture and System Optimization Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=J%C3%A4%C3%A4saari%2C+E">Elias J盲盲saari</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Michelle Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+T">Tianqi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.12218v1-abstract-short" style="display: inline;"> There is a growing need to deploy machine learning for different tasks on a wide array of new hardware platforms. Such deployment scenarios require tackling multiple challenges, including identifying a model architecture that can achieve a suitable predictive accuracy (architecture search), and finding an efficient implementation of the model to satisfy underlying hardware-specific systems constra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.12218v1-abstract-full').style.display = 'inline'; document.getElementById('2208.12218v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.12218v1-abstract-full" style="display: none;"> There is a growing need to deploy machine learning for different tasks on a wide array of new hardware platforms. Such deployment scenarios require tackling multiple challenges, including identifying a model architecture that can achieve a suitable predictive accuracy (architecture search), and finding an efficient implementation of the model to satisfy underlying hardware-specific systems constraints such as latency (system optimization search). Existing works treat architecture search and system optimization search as separate problems and solve them sequentially. In this paper, we instead propose to solve these problems jointly, and introduce a simple but effective baseline method called SONAR that interleaves these two search problems. SONAR aims to efficiently optimize for predictive accuracy and inference latency by applying early stopping to both search processes. Our experiments on multiple different hardware back-ends show that SONAR identifies nearly optimal architectures 30 times faster than a brute force approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.12218v1-abstract-full').style.display = 'none'; document.getElementById('2208.12218v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.10199">arXiv:2207.10199</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.10199">pdf</a>, <a href="https://arxiv.org/format/2207.10199">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Provably tuning the ElasticNet across instances </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Sharma%2C+D">Dravyansh Sharma</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.10199v2-abstract-short" style="display: inline;"> An important unresolved challenge in the theory of regularization is to set the regularization coefficients of popular techniques like the ElasticNet with general provable guarantees. We consider the problem of tuning the regularization parameters of Ridge regression, LASSO, and the ElasticNet across multiple problem instances, a setting that encompasses both cross-validation and multi-task hyperp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.10199v2-abstract-full').style.display = 'inline'; document.getElementById('2207.10199v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.10199v2-abstract-full" style="display: none;"> An important unresolved challenge in the theory of regularization is to set the regularization coefficients of popular techniques like the ElasticNet with general provable guarantees. We consider the problem of tuning the regularization parameters of Ridge regression, LASSO, and the ElasticNet across multiple problem instances, a setting that encompasses both cross-validation and multi-task hyperparameter optimization. We obtain a novel structural result for the ElasticNet which characterizes the loss as a function of the tuning parameters as a piecewise-rational function with algebraic boundaries. We use this to bound the structural complexity of the regularized loss functions and show generalization guarantees for tuning the ElasticNet regression coefficients in the statistical setting. We also consider the more challenging online learning setting, where we show vanishing average expected regret relative to the optimal parameter pair. We further extend our results to tuning classification algorithms obtained by thresholding regression fits regularized by Ridge, LASSO, or ElasticNet. Our results are the first general learning-theoretic guarantees for this important class of problems that avoid strong assumptions on the data distribution. Furthermore, our guarantees hold for both validation and popular information criterion objectives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.10199v2-abstract-full').style.display = 'none'; document.getElementById('2207.10199v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.04104">arXiv:2207.04104</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.04104">pdf</a>, <a href="https://arxiv.org/format/2207.04104">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards a More Rigorous Science of Blindspot Discovery in Image Classification Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+N">Nari Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Cabrera%2C+%C3%81+A">脕ngel Alexander Cabrera</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.04104v3-abstract-short" style="display: inline;"> A growing body of work studies Blindspot Discovery Methods (&#34;BDM&#34;s): methods that use an image embedding to find semantically meaningful (i.e., united by a human-understandable concept) subsets of the data where an image classifier performs significantly worse. Motivated by observed gaps in prior work, we introduce a new framework for evaluating BDMs, SpotCheck, that uses synthetic image datasets&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.04104v3-abstract-full').style.display = 'inline'; document.getElementById('2207.04104v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.04104v3-abstract-full" style="display: none;"> A growing body of work studies Blindspot Discovery Methods (&#34;BDM&#34;s): methods that use an image embedding to find semantically meaningful (i.e., united by a human-understandable concept) subsets of the data where an image classifier performs significantly worse. Motivated by observed gaps in prior work, we introduce a new framework for evaluating BDMs, SpotCheck, that uses synthetic image datasets to train models with known blindspots and a new BDM, PlaneSpot, that uses a 2D image representation. We use SpotCheck to run controlled experiments that identify factors that influence BDM performance (e.g., the number of blindspots in a model, or features used to define the blindspot) and show that PlaneSpot is competitive with and in many cases outperforms existing BDMs. Importantly, we validate these findings by designing additional experiments that use real image data from MS-COCO, a large image benchmark dataset. Our findings suggest several promising directions for future work on BDM design and evaluation. Overall, we hope that the methodology and analyses presented in this work will help facilitate a more rigorous science of blindspot discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.04104v3-abstract-full').style.display = 'none'; document.getElementById('2207.04104v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">reviewed on OpenReview: https://openreview.net/forum?id=MaDvbLaBiF</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> TMLR 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.13503">arXiv:2206.13503</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.13503">pdf</a>, <a href="https://arxiv.org/format/2206.13503">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> On the Importance of Application-Grounded Experimental Design for Evaluating Explainable ML Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Amarasinghe%2C+K">Kasun Amarasinghe</a>, <a href="/search/cs?searchtype=author&amp;query=Rodolfa%2C+K+T">Kit T. Rodolfa</a>, <a href="/search/cs?searchtype=author&amp;query=Jesus%2C+S">S茅rgio Jesus</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Balayan%2C+V">Vladimir Balayan</a>, <a href="/search/cs?searchtype=author&amp;query=Saleiro%2C+P">Pedro Saleiro</a>, <a href="/search/cs?searchtype=author&amp;query=Bizarro%2C+P">Pedro Bizarro</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Ghani%2C+R">Rayid Ghani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.13503v4-abstract-short" style="display: inline;"> Most existing evaluations of explainable machine learning (ML) methods rely on simplifying assumptions or proxies that do not reflect real-world use cases; the handful of more robust evaluations on real-world settings have shortcomings in their design, resulting in limited conclusions of methods&#39; real-world utility. In this work, we seek to bridge this gap by conducting a study that evaluates thre&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.13503v4-abstract-full').style.display = 'inline'; document.getElementById('2206.13503v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.13503v4-abstract-full" style="display: none;"> Most existing evaluations of explainable machine learning (ML) methods rely on simplifying assumptions or proxies that do not reflect real-world use cases; the handful of more robust evaluations on real-world settings have shortcomings in their design, resulting in limited conclusions of methods&#39; real-world utility. In this work, we seek to bridge this gap by conducting a study that evaluates three popular explainable ML methods in a setting consistent with the intended deployment context. We build on a previous study on e-commerce fraud detection and make crucial modifications to its setup relaxing the simplifying assumptions made in the original work that departed from the deployment context. In doing so, we draw drastically different conclusions from the earlier work and find no evidence for the incremental utility of the tested methods in the task. Our results highlight how seemingly trivial experimental design choices can yield misleading conclusions, with lessons about the necessity of not only evaluating explainable ML methods using tasks, data, users, and metrics grounded in the intended deployment contexts but also developing methods tailored to specific applications. In addition, we believe the design of this experiment can serve as a template for future study designs evaluating explainable ML methods in other real-world contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.13503v4-abstract-full').style.display = 'none'; document.getElementById('2206.13503v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.02256">arXiv:2206.02256</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.02256">pdf</a>, <a href="https://arxiv.org/format/2206.02256">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Use-Case-Grounded Simulations for Explanation Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+N">Nari Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Topin%2C+N">Nicholay Topin</a>, <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.02256v2-abstract-short" style="display: inline;"> A growing body of research runs human subject evaluations to study whether providing users with explanations of machine learning models can help them with practical real-world use cases. However, running user studies is challenging and costly, and consequently each study typically only evaluates a limited number of different settings, e.g., studies often only evaluate a few arbitrarily selected ex&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02256v2-abstract-full').style.display = 'inline'; document.getElementById('2206.02256v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.02256v2-abstract-full" style="display: none;"> A growing body of research runs human subject evaluations to study whether providing users with explanations of machine learning models can help them with practical real-world use cases. However, running user studies is challenging and costly, and consequently each study typically only evaluates a limited number of different settings, e.g., studies often only evaluate a few arbitrarily selected explanation methods. To address these challenges and aid user study design, we introduce Use-Case-Grounded Simulated Evaluations (SimEvals). SimEvals involve training algorithmic agents that take as input the information content (such as model explanations) that would be presented to each participant in a human subject study, to predict answers to the use case of interest. The algorithmic agent&#39;s test set accuracy provides a measure of the predictiveness of the information content for the downstream use case. We run a comprehensive evaluation on three real-world use cases (forward simulation, model debugging, and counterfactual reasoning) to demonstrate that Simevals can effectively identify which explanation methods will help humans for each use case. These results provide evidence that SimEvals can be used to efficiently screen an important set of user study design decisions, e.g. selecting which explanations should be presented to the user, before running a potentially costly user study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02256v2-abstract-full').style.display = 'none'; document.getElementById('2206.02256v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.14082">arXiv:2205.14082</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.14082">pdf</a>, <a href="https://arxiv.org/format/2205.14082">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AANG: Automating Auxiliary Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dery%2C+L+M">Lucio M. Dery</a>, <a href="/search/cs?searchtype=author&amp;query=Michel%2C+P">Paul Michel</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.14082v2-abstract-short" style="display: inline;"> Auxiliary objectives, supplementary learning signals that are introduced to help aid learning on data-starved or highly complex end-tasks, are commonplace in machine learning. Whilst much work has been done to formulate useful auxiliary objectives, their construction is still an art which proceeds by slow and tedious hand-design. Intuition for how and when these objectives improve end-task perform&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.14082v2-abstract-full').style.display = 'inline'; document.getElementById('2205.14082v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.14082v2-abstract-full" style="display: none;"> Auxiliary objectives, supplementary learning signals that are introduced to help aid learning on data-starved or highly complex end-tasks, are commonplace in machine learning. Whilst much work has been done to formulate useful auxiliary objectives, their construction is still an art which proceeds by slow and tedious hand-design. Intuition for how and when these objectives improve end-task performance has also had limited theoretical backing. In this work, we present an approach for automatically generating a suite of auxiliary objectives. We achieve this by deconstructing existing objectives within a novel unified taxonomy, identifying connections between them, and generating new ones based on the uncovered structure. Next, we theoretically formalize widely-held intuitions about how auxiliary learning improves generalization on the end-task. This leads us to a principled and efficient algorithm for searching the space of generated objectives to find those most useful to a specified end-task. With natural language processing (NLP) as our domain of study, we demonstrate that our automated auxiliary learning pipeline leads to strong improvements over competitive baselines across continued training experiments on a pre-trained model on 5 NLP tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.14082v2-abstract-full').style.display = 'none'; document.getElementById('2205.14082v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2023 22 pages, 7 tables and 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.06905">arXiv:2205.06905</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.06905">pdf</a>, <a href="https://arxiv.org/format/2205.06905">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Perspectives on Incorporating Expert Feedback into Model Updates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+U">Umang Bhatt</a>, <a href="/search/cs?searchtype=author&amp;query=Heidari%2C+H">Hoda Heidari</a>, <a href="/search/cs?searchtype=author&amp;query=Weller%2C+A">Adrian Weller</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.06905v2-abstract-short" style="display: inline;"> Machine learning (ML) practitioners are increasingly tasked with developing models that are aligned with non-technical experts&#39; values and goals. However, there has been insufficient consideration on how practitioners should translate domain expertise into ML updates. In this paper, we consider how to capture interactions between practitioners and experts systematically. We devise a taxonomy to ma&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.06905v2-abstract-full').style.display = 'inline'; document.getElementById('2205.06905v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.06905v2-abstract-full" style="display: none;"> Machine learning (ML) practitioners are increasingly tasked with developing models that are aligned with non-technical experts&#39; values and goals. However, there has been insufficient consideration on how practitioners should translate domain expertise into ML updates. In this paper, we consider how to capture interactions between practitioners and experts systematically. We devise a taxonomy to match expert feedback types with practitioner updates. A practitioner may receive feedback from an expert at the observation- or domain-level, and convert this feedback into updates to the dataset, loss function, or parameter space. We review existing work from ML and human-computer interaction to describe this feedback-update taxonomy, and highlight the insufficient consideration given to incorporating feedback from non-technical experts. We end with a set of open questions that naturally arise from our proposed taxonomy and subsequent survey. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.06905v2-abstract-full').style.display = 'none'; document.getElementById('2205.06905v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07554">arXiv:2204.07554</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.07554">pdf</a>, <a href="https://arxiv.org/format/2204.07554">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient Architecture Search for Diverse Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shen%2C+J">Junhong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07554v3-abstract-short" style="display: inline;"> While neural architecture search (NAS) has enabled automated machine learning (AutoML) for well-researched areas, its application to tasks beyond computer vision is still under-explored. As less-studied domains are precisely those where we expect AutoML to have the greatest impact, in this work we study NAS for efficiently solving diverse problems. Seeking an approach that is fast, simple, and bro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07554v3-abstract-full').style.display = 'inline'; document.getElementById('2204.07554v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07554v3-abstract-full" style="display: none;"> While neural architecture search (NAS) has enabled automated machine learning (AutoML) for well-researched areas, its application to tasks beyond computer vision is still under-explored. As less-studied domains are precisely those where we expect AutoML to have the greatest impact, in this work we study NAS for efficiently solving diverse problems. Seeking an approach that is fast, simple, and broadly applicable, we fix a standard convolutional network (CNN) topology and propose to search for the right kernel sizes and dilations its operations should take on. This dramatically expands the model&#39;s capacity to extract features at multiple resolutions for different types of data while only requiring search over the operation space. To overcome the efficiency challenges of naive weight-sharing in this search space, we introduce DASH, a differentiable NAS algorithm that computes the mixture-of-operations using the Fourier diagonalization of convolution, achieving both a better asymptotic complexity and an up-to-10x search time speedup in practice. We evaluate DASH on ten tasks spanning a variety of application domains such as PDE solving, protein folding, and heart disease detection. DASH outperforms state-of-the-art AutoML methods in aggregate, attaining the best-known automated performance on seven tasks. Meanwhile, on six of the ten tasks, the combined search and retraining time is less than 2x slower than simply training a CNN backbone that is far less accurate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07554v3-abstract-full').style.display = 'none'; document.getElementById('2204.07554v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022 Camera-Ready; code available at https://github.com/sjunhongshen/DASH</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.09312">arXiv:2202.09312</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.09312">pdf</a>, <a href="https://arxiv.org/format/2202.09312">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning Predictions for Algorithms with Predictions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Vassilvitskii%2C+S">Sergei Vassilvitskii</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.09312v2-abstract-short" style="display: inline;"> A burgeoning paradigm in algorithm design is the field of algorithms with predictions, in which algorithms can take advantage of a possibly-imperfect prediction of some aspect of the problem. While much work has focused on using predictions to improve competitive ratios, running times, or other performance measures, less effort has been devoted to the question of how to obtain the predictions them&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09312v2-abstract-full').style.display = 'inline'; document.getElementById('2202.09312v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.09312v2-abstract-full" style="display: none;"> A burgeoning paradigm in algorithm design is the field of algorithms with predictions, in which algorithms can take advantage of a possibly-imperfect prediction of some aspect of the problem. While much work has focused on using predictions to improve competitive ratios, running times, or other performance measures, less effort has been devoted to the question of how to obtain the predictions themselves, especially in the critical online setting. We introduce a general design approach for algorithms that learn predictors: (1) identify a functional dependence of the performance measure on the prediction quality and (2) apply techniques from online learning to learn predictors, tune robustness-consistency trade-offs, and bound the sample complexity. We demonstrate the effectiveness of our approach by applying it to bipartite matching, ski-rental, page migration, and job scheduling. In several settings we improve upon multiple existing results while utilizing a much simpler analysis, while in the others we provide the first learning-theoretic guarantees. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09312v2-abstract-full').style.display = 'none'; document.getElementById('2202.09312v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022 camera-ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.06283">arXiv:2112.06283</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.06283">pdf</a>, <a href="https://arxiv.org/format/2112.06283">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Persuasion for Algorithmic Recourse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J+S">Joon Sik Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Heidari%2C+H">Hoda Heidari</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.06283v3-abstract-short" style="display: inline;"> When subjected to automated decision-making, decision subjects may strategically modify their observable features in ways they believe will maximize their chances of receiving a favorable decision. In many practical situations, the underlying assessment rule is deliberately kept secret to avoid gaming and maintain competitive advantage. The resulting opacity forces the decision subjects to rely on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.06283v3-abstract-full').style.display = 'inline'; document.getElementById('2112.06283v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.06283v3-abstract-full" style="display: none;"> When subjected to automated decision-making, decision subjects may strategically modify their observable features in ways they believe will maximize their chances of receiving a favorable decision. In many practical situations, the underlying assessment rule is deliberately kept secret to avoid gaming and maintain competitive advantage. The resulting opacity forces the decision subjects to rely on incomplete information when making strategic feature modifications. We capture such settings as a game of Bayesian persuasion, in which the decision maker offers a form of recourse to the decision subject by providing them with an action recommendation (or signal) to incentivize them to modify their features in desirable ways. We show that when using persuasion, the decision maker and decision subject are never worse off in expectation, while the decision maker can be significantly better off. While the decision maker&#39;s problem of finding the optimal Bayesian incentive-compatible (BIC) signaling policy takes the form of optimization over infinitely-many variables, we show that this optimization can be cast as a linear program over finitely-many regions of the space of possible assessment rules. While this reformulation simplifies the problem dramatically, solving the linear program requires reasoning about exponentially-many variables, even in relatively simple cases. Motivated by this observation, we provide a polynomial-time approximation scheme that recovers a near-optimal signaling policy. Finally, our numerical simulations on semi-synthetic data empirically demonstrate the benefits of using persuasion in the algorithmic recourse setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.06283v3-abstract-full').style.display = 'none'; document.getElementById('2112.06283v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In the thirty-sixth Conference on Neural Information Processing Systems (NeurIPS 2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.05668">arXiv:2110.05668</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.05668">pdf</a>, <a href="https://arxiv.org/format/2110.05668">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NAS-Bench-360: Benchmarking Neural Architecture Search on Diverse Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tu%2C+R">Renbo Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Roberts%2C+N">Nicholas Roberts</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+J">Junhong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Sala%2C+F">Frederic Sala</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.05668v6-abstract-short" style="display: inline;"> Most existing neural architecture search (NAS) benchmarks and algorithms prioritize well-studied tasks, e.g. image classification on CIFAR or ImageNet. This makes the performance of NAS approaches in more diverse areas poorly understood. In this paper, we present NAS-Bench-360, a benchmark suite to evaluate methods on domains beyond those traditionally studied in architecture search, and use it to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.05668v6-abstract-full').style.display = 'inline'; document.getElementById('2110.05668v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.05668v6-abstract-full" style="display: none;"> Most existing neural architecture search (NAS) benchmarks and algorithms prioritize well-studied tasks, e.g. image classification on CIFAR or ImageNet. This makes the performance of NAS approaches in more diverse areas poorly understood. In this paper, we present NAS-Bench-360, a benchmark suite to evaluate methods on domains beyond those traditionally studied in architecture search, and use it to address the following question: do state-of-the-art NAS methods perform well on diverse tasks? To construct the benchmark, we curate ten tasks spanning a diverse array of application domains, dataset sizes, problem dimensionalities, and learning objectives. Each task is carefully chosen to interoperate with modern CNN-based search methods while possibly being far-afield from its original development domain. To speed up and reduce the cost of NAS research, for two of the tasks we release the precomputed performance of 15,625 architectures comprising a standard CNN search space. Experimentally, we show the need for more robust NAS evaluation of the kind NAS-Bench-360 enables by showing that several modern NAS procedures perform inconsistently across the ten tasks, with many catastrophically poor results. We also demonstrate how NAS-Bench-360 and its associated precomputed results will enable future scientific discoveries by testing whether several recent hypotheses promoted in the NAS literature hold on diverse tasks. NAS-Bench-360 is hosted at https://nb360.ml.cmu.edu. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.05668v6-abstract-full').style.display = 'none'; document.getElementById('2110.05668v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022 Datasets and Benchmarks Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.07437">arXiv:2109.07437</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.07437">pdf</a>, <a href="https://arxiv.org/format/2109.07437">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Should We Be Pre-training? An Argument for End-task Aware Training as an Alternative </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dery%2C+L+M">Lucio M. Dery</a>, <a href="/search/cs?searchtype=author&amp;query=Michel%2C+P">Paul Michel</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.07437v2-abstract-short" style="display: inline;"> In most settings of practical concern, machine learning practitioners know in advance what end-task they wish to boost with auxiliary tasks. However, widely used methods for leveraging auxiliary data like pre-training and its continued-pretraining variant are end-task agnostic: they rarely, if ever, exploit knowledge of the target task. We study replacing end-task agnostic continued training of pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.07437v2-abstract-full').style.display = 'inline'; document.getElementById('2109.07437v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.07437v2-abstract-full" style="display: none;"> In most settings of practical concern, machine learning practitioners know in advance what end-task they wish to boost with auxiliary tasks. However, widely used methods for leveraging auxiliary data like pre-training and its continued-pretraining variant are end-task agnostic: they rarely, if ever, exploit knowledge of the target task. We study replacing end-task agnostic continued training of pre-trained language models with end-task aware training of said models. We argue that for sufficiently important end-tasks, the benefits of leveraging auxiliary data in a task-aware fashion can justify forgoing the traditional approach of obtaining generic, end-task agnostic representations as with (continued) pre-training. On three different low-resource NLP tasks from two domains, we demonstrate that multi-tasking the end-task and auxiliary objectives results in significantly better downstream task performance than the widely-used task-agnostic continued pre-training paradigm of Gururangan et al. (2020). We next introduce an online meta-learning algorithm that learns a set of multi-task weights to better balance among our multiple auxiliary objectives, achieving further improvements on end-task performance and data efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.07437v2-abstract-full').style.display = 'none'; document.getElementById('2109.07437v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.08770">arXiv:2108.08770</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2108.08770">pdf</a>, <a href="https://arxiv.org/format/2108.08770">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning-to-learn non-convex piecewise-Lipschitz functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Sharma%2C+D">Dravyansh Sharma</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.08770v1-abstract-short" style="display: inline;"> We analyze the meta-learning of the initialization and step-size of learning algorithms for piecewise-Lipschitz functions, a non-convex setting with applications to both machine learning and algorithms. Starting from recent regret bounds for the exponential forecaster on losses with dispersed discontinuities, we generalize them to be initialization-dependent and then use this result to propose a p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.08770v1-abstract-full').style.display = 'inline'; document.getElementById('2108.08770v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.08770v1-abstract-full" style="display: none;"> We analyze the meta-learning of the initialization and step-size of learning algorithms for piecewise-Lipschitz functions, a non-convex setting with applications to both machine learning and algorithms. Starting from recent regret bounds for the exponential forecaster on losses with dispersed discontinuities, we generalize them to be initialization-dependent and then use this result to propose a practical meta-learning procedure that learns both the initialization and the step-size of the algorithm from multiple online learning tasks. Asymptotically, we guarantee that the average regret across tasks scales with a natural notion of task-similarity that measures the amount of overlap between near-optimal regions of different tasks. Finally, we instantiate the method and its guarantee in two important settings: robust meta-learning and multi-task data-driven algorithm design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.08770v1-abstract-full').style.display = 'none'; document.getElementById('2108.08770v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.06917">arXiv:2107.06917</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2107.06917">pdf</a>, <a href="https://arxiv.org/format/2107.06917">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Field Guide to Federated Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jianyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Charles%2C+Z">Zachary Charles</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Zheng Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+G">Gauri Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=McMahan%2C+H+B">H. Brendan McMahan</a>, <a href="/search/cs?searchtype=author&amp;query=Arcas%2C+B+A+y">Blaise Aguera y Arcas</a>, <a href="/search/cs?searchtype=author&amp;query=Al-Shedivat%2C+M">Maruan Al-Shedivat</a>, <a href="/search/cs?searchtype=author&amp;query=Andrew%2C+G">Galen Andrew</a>, <a href="/search/cs?searchtype=author&amp;query=Avestimehr%2C+S">Salman Avestimehr</a>, <a href="/search/cs?searchtype=author&amp;query=Daly%2C+K">Katharine Daly</a>, <a href="/search/cs?searchtype=author&amp;query=Data%2C+D">Deepesh Data</a>, <a href="/search/cs?searchtype=author&amp;query=Diggavi%2C+S">Suhas Diggavi</a>, <a href="/search/cs?searchtype=author&amp;query=Eichner%2C+H">Hubert Eichner</a>, <a href="/search/cs?searchtype=author&amp;query=Gadhikar%2C+A">Advait Gadhikar</a>, <a href="/search/cs?searchtype=author&amp;query=Garrett%2C+Z">Zachary Garrett</a>, <a href="/search/cs?searchtype=author&amp;query=Girgis%2C+A+M">Antonious M. Girgis</a>, <a href="/search/cs?searchtype=author&amp;query=Hanzely%2C+F">Filip Hanzely</a>, <a href="/search/cs?searchtype=author&amp;query=Hard%2C+A">Andrew Hard</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+C">Chaoyang He</a>, <a href="/search/cs?searchtype=author&amp;query=Horvath%2C+S">Samuel Horvath</a>, <a href="/search/cs?searchtype=author&amp;query=Huo%2C+Z">Zhouyuan Huo</a>, <a href="/search/cs?searchtype=author&amp;query=Ingerman%2C+A">Alex Ingerman</a>, <a href="/search/cs?searchtype=author&amp;query=Jaggi%2C+M">Martin Jaggi</a>, <a href="/search/cs?searchtype=author&amp;query=Javidi%2C+T">Tara Javidi</a>, <a href="/search/cs?searchtype=author&amp;query=Kairouz%2C+P">Peter Kairouz</a> , et al. (28 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.06917v1-abstract-short" style="display: inline;"> Federated learning and analytics are a distributed approach for collaboratively learning models (or statistics) from decentralized data, motivated by and designed for privacy protection. The distributed learning process can be formulated as solving federated optimization problems, which emphasize communication efficiency, data heterogeneity, compatibility with privacy and system requirements, and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06917v1-abstract-full').style.display = 'inline'; document.getElementById('2107.06917v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.06917v1-abstract-full" style="display: none;"> Federated learning and analytics are a distributed approach for collaboratively learning models (or statistics) from decentralized data, motivated by and designed for privacy protection. The distributed learning process can be formulated as solving federated optimization problems, which emphasize communication efficiency, data heterogeneity, compatibility with privacy and system requirements, and other constraints that are not primary considerations in other problem settings. This paper provides recommendations and guidelines on formulating, designing, evaluating and analyzing federated optimization algorithms through concrete examples and practical implementation, with a focus on conducting effective simulations to infer real-world performance. The goal of this work is not to survey the current literature, but to inspire researchers and practitioners to design federated learning algorithms that can be used in various practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06917v1-abstract-full').style.display = 'none'; document.getElementById('2107.06917v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.04502">arXiv:2106.04502</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.04502">pdf</a>, <a href="https://arxiv.org/format/2106.04502">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Federated Hyperparameter Tuning: Challenges, Baselines, and Connections to Weight-Sharing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Tu%2C+R">Renbo Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liam Li</a>, <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+V">Virginia Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.04502v2-abstract-short" style="display: inline;"> Tuning hyperparameters is a crucial but arduous part of the machine learning pipeline. Hyperparameter optimization is even more challenging in federated learning, where models are learned over a distributed network of heterogeneous devices; here, the need to keep data on device and perform local training makes it difficult to efficiently train and evaluate configurations. In this work, we investig&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.04502v2-abstract-full').style.display = 'inline'; document.getElementById('2106.04502v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.04502v2-abstract-full" style="display: none;"> Tuning hyperparameters is a crucial but arduous part of the machine learning pipeline. Hyperparameter optimization is even more challenging in federated learning, where models are learned over a distributed network of heterogeneous devices; here, the need to keep data on device and perform local training makes it difficult to efficiently train and evaluate configurations. In this work, we investigate the problem of federated hyperparameter tuning. We first identify key challenges and show how standard approaches may be adapted to form baselines for the federated setting. Then, by making a novel connection to the neural architecture search technique of weight-sharing, we introduce a new method, FedEx, to accelerate federated hyperparameter tuning that is applicable to widely-used federated optimization methods such as FedAvg and recent variants. Theoretically, we show that a FedEx variant correctly tunes the on-device learning rate in the setting of online convex optimization across devices. Empirically, we show that FedEx can outperform natural baselines for federated hyperparameter tuning by several percentage points on the Shakespeare, FEMNIST, and CIFAR-10 benchmarks, obtaining higher accuracy using the same training budget. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.04502v2-abstract-full').style.display = 'none'; document.getElementById('2106.04502v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.02112">arXiv:2106.02112</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.02112">pdf</a>, <a href="https://arxiv.org/format/2106.02112">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Finding and Fixing Spurious Patterns with Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Ribeiro%2C+M+T">Marco Tulio Ribeiro</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.02112v3-abstract-short" style="display: inline;"> Image classifiers often use spurious patterns, such as &#34;relying on the presence of a person to detect a tennis racket, which do not generalize. In this work, we present an end-to-end pipeline for identifying and mitigating spurious patterns for such models, under the assumption that we have access to pixel-wise object-annotations. We start by identifying patterns such as &#34;the model&#39;s prediction fo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02112v3-abstract-full').style.display = 'inline'; document.getElementById('2106.02112v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.02112v3-abstract-full" style="display: none;"> Image classifiers often use spurious patterns, such as &#34;relying on the presence of a person to detect a tennis racket, which do not generalize. In this work, we present an end-to-end pipeline for identifying and mitigating spurious patterns for such models, under the assumption that we have access to pixel-wise object-annotations. We start by identifying patterns such as &#34;the model&#39;s prediction for tennis racket changes 63% of the time if we hide the people.&#34; Then, if a pattern is spurious, we mitigate it via a novel form of data augmentation. We demonstrate that our method identifies a diverse set of spurious patterns and that it mitigates them by producing a model that is both more accurate on a distribution where the spurious pattern is not helpful and more robust to distribution shift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02112v3-abstract-full').style.display = 'none'; document.getElementById('2106.02112v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.06506">arXiv:2105.06506</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2105.06506">pdf</a>, <a href="https://arxiv.org/format/2105.06506">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sanity Simulations for Saliency Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J+S">Joon Sik Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.06506v3-abstract-short" style="display: inline;"> Saliency methods are a popular class of feature attribution explanation methods that aim to capture a model&#39;s predictive reasoning by identifying &#34;important&#34; pixels in an input image. However, the development and adoption of these methods are hindered by the lack of access to ground-truth model reasoning, which prevents accurate evaluation. In this work, we design a synthetic benchmarking framewor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.06506v3-abstract-full').style.display = 'inline'; document.getElementById('2105.06506v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.06506v3-abstract-full" style="display: none;"> Saliency methods are a popular class of feature attribution explanation methods that aim to capture a model&#39;s predictive reasoning by identifying &#34;important&#34; pixels in an input image. However, the development and adoption of these methods are hindered by the lack of access to ground-truth model reasoning, which prevents accurate evaluation. In this work, we design a synthetic benchmarking framework, SMERF, that allows us to perform ground-truth-based evaluation while controlling the complexity of the model&#39;s reasoning. Experimentally, SMERF reveals significant limitations in existing saliency methods and, as a result, represents a useful tool for the development of new saliency methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.06506v3-abstract-full').style.display = 'none'; document.getElementById('2105.06506v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to International Conference on Machine Learning (ICML 2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.15798">arXiv:2103.15798</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.15798">pdf</a>, <a href="https://arxiv.org/format/2103.15798">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Neural Operations for Diverse Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Roberts%2C+N">Nicholas Roberts</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Dao%2C+T">Tri Dao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liam Li</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%A9%2C+C">Christopher R茅</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.15798v2-abstract-short" style="display: inline;"> An important goal of AutoML is to automate-away the design of neural networks on new tasks in under-explored domains. Motivated by this goal, we study the problem of enabling users to discover the right neural operations given data from their specific domain. We introduce a search space of operations called XD-Operations that mimic the inductive bias of standard multi-channel convolutions while be&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15798v2-abstract-full').style.display = 'inline'; document.getElementById('2103.15798v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.15798v2-abstract-full" style="display: none;"> An important goal of AutoML is to automate-away the design of neural networks on new tasks in under-explored domains. Motivated by this goal, we study the problem of enabling users to discover the right neural operations given data from their specific domain. We introduce a search space of operations called XD-Operations that mimic the inductive bias of standard multi-channel convolutions while being much more expressive: we prove that it includes many named operations across multiple application areas. Starting with any standard backbone such as ResNet, we show how to transform it into a search space over XD-operations and how to traverse the space using a simple weight-sharing scheme. On a diverse set of tasks -- solving PDEs, distance prediction for protein folding, and music modeling -- our approach consistently yields models with lower error than baseline networks and often even lower error than expert-designed domain-specific approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15798v2-abstract-full').style.display = 'none'; document.getElementById('2103.15798v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.06254">arXiv:2103.06254</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.06254">pdf</a>, <a href="https://arxiv.org/format/2103.06254">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Interpretable Machine Learning: Moving From Mythos to Diagnostics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+V">Valerie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jeffrey Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J+S">Joon Sik Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.06254v2-abstract-short" style="display: inline;"> Despite increasing interest in the field of Interpretable Machine Learning (IML), a significant gap persists between the technical objectives targeted by researchers&#39; methods and the high-level goals of consumers&#39; use cases. In this work, we synthesize foundational work on IML methods and evaluation into an actionable taxonomy. This taxonomy serves as a tool to conceptualize the gap between resear&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.06254v2-abstract-full').style.display = 'inline'; document.getElementById('2103.06254v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.06254v2-abstract-full" style="display: none;"> Despite increasing interest in the field of Interpretable Machine Learning (IML), a significant gap persists between the technical objectives targeted by researchers&#39; methods and the high-level goals of consumers&#39; use cases. In this work, we synthesize foundational work on IML methods and evaluation into an actionable taxonomy. This taxonomy serves as a tool to conceptualize the gap between researchers and consumers, illustrated by the lack of connections between its methods and use cases components. It also provides the foundation from which we describe a three-step workflow to better enable researchers and consumers to work together to discover what types of methods are useful for what use cases. Eventually, by building on the results generated from this workflow, a more complete version of the taxonomy will increasingly allow consumers to find relevant methods for their target use cases and researchers to identify applicable use cases for their proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.06254v2-abstract-full').style.display = 'none'; document.getElementById('2103.06254v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at ICML HILL Workshop 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.00065">arXiv:2103.00065</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.00065">pdf</a>, <a href="https://arxiv.org/format/2103.00065">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Gradient Descent on Neural Networks Typically Occurs at the Edge of Stability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+J+M">Jeremy M. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Kaur%2C+S">Simran Kaur</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yuanzhi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kolter%2C+J+Z">J. Zico Kolter</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.00065v3-abstract-short" style="display: inline;"> We empirically demonstrate that full-batch gradient descent on neural network training objectives typically operates in a regime we call the Edge of Stability. In this regime, the maximum eigenvalue of the training loss Hessian hovers just above the numerical value $2 / \text{(step size)}$, and the training loss behaves non-monotonically over short timescales, yet consistently decreases over long&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00065v3-abstract-full').style.display = 'inline'; document.getElementById('2103.00065v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.00065v3-abstract-full" style="display: none;"> We empirically demonstrate that full-batch gradient descent on neural network training objectives typically operates in a regime we call the Edge of Stability. In this regime, the maximum eigenvalue of the training loss Hessian hovers just above the numerical value $2 / \text{(step size)}$, and the training loss behaves non-monotonically over short timescales, yet consistently decreases over long timescales. Since this behavior is inconsistent with several widespread presumptions in the field of optimization, our findings raise questions as to whether these presumptions are relevant to neural network training. We hope that our findings will inspire future efforts aimed at rigorously understanding optimization at the Edge of Stability. Code is available at https://github.com/locuslab/edge-of-stability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00065v3-abstract-full').style.display = 'none'; document.getElementById('2103.00065v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2021. v3 moves several figures from the appendix into the main text, and adds more discussion regarding Jastrz臋bski et al (2020): https://doi.org/10.48550/arXiv.2002.09572</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.00127">arXiv:2102.00127</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.00127">pdf</a>, <a href="https://arxiv.org/format/2102.00127">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On Data Efficiency of Meta-learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Al-Shedivat%2C+M">Maruan Al-Shedivat</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liam Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+E">Eric Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.00127v1-abstract-short" style="display: inline;"> Meta-learning has enabled learning statistical models that can be quickly adapted to new prediction tasks. Motivated by use-cases in personalized federated learning, we study the often overlooked aspect of the modern meta-learning algorithms -- their data efficiency. To shed more light on which methods are more efficient, we use techniques from algorithmic stability to derive bounds on the transfe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.00127v1-abstract-full').style.display = 'inline'; document.getElementById('2102.00127v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.00127v1-abstract-full" style="display: none;"> Meta-learning has enabled learning statistical models that can be quickly adapted to new prediction tasks. Motivated by use-cases in personalized federated learning, we study the often overlooked aspect of the modern meta-learning algorithms -- their data efficiency. To shed more light on which methods are more efficient, we use techniques from algorithmic stability to derive bounds on the transfer risk that have important practical implications, indicating how much supervision is needed and how it must be allocated for each method to attain the desired level of generalization. Further, we introduce a new simple framework for evaluating meta-learning methods under a limit on the available supervision, conduct an empirical study of MAML, Reptile, and Protonets, and demonstrate the differences in the behavior of these methods on few-shot and federated learning benchmarks. Finally, we propose active meta-learning, which incorporates active data selection into learning-to-learn, leading to better performance of all methods in the limited supervision regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.00127v1-abstract-full').style.display = 'none'; document.getElementById('2102.00127v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preliminary version. An updated version is to appear in AISTATS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.01205">arXiv:2011.01205</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2011.01205">pdf</a>, <a href="https://arxiv.org/format/2011.01205">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Learning Theoretic Perspective on Local Explainability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jeffrey Li</a>, <a href="/search/cs?searchtype=author&amp;query=Nagarajan%2C+V">Vaishnavh Nagarajan</a>, <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.01205v1-abstract-short" style="display: inline;"> In this paper, we explore connections between interpretable machine learning and learning theory through the lens of local approximation explanations. First, we tackle the traditional problem of performance generalization and bound the test-time accuracy of a model using a notion of how locally explainable it is. Second, we explore the novel problem of explanation generalization which is an import&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.01205v1-abstract-full').style.display = 'inline'; document.getElementById('2011.01205v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.01205v1-abstract-full" style="display: none;"> In this paper, we explore connections between interpretable machine learning and learning theory through the lens of local approximation explanations. First, we tackle the traditional problem of performance generalization and bound the test-time accuracy of a model using a notion of how locally explainable it is. Second, we explore the novel problem of explanation generalization which is an important concern for a growing class of finite sample-based local approximation explanations. Finally, we validate our theoretical results empirically and show that they reflect what can be seen in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.01205v1-abstract-full').style.display = 'none'; document.getElementById('2011.01205v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.07802">arXiv:2004.07802</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.07802">pdf</a>, <a href="https://arxiv.org/format/2004.07802">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Geometry-Aware Gradient Algorithms for Neural Architecture Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liam Li</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.07802v5-abstract-short" style="display: inline;"> Recent state-of-the-art methods for neural architecture search (NAS) exploit gradient-based optimization by relaxing the problem into continuous optimization over architectures and shared-weights, a noisy process that remains poorly understood. We argue for the study of single-level empirical risk minimization to understand NAS with weight-sharing, reducing the design of NAS methods to devising op&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.07802v5-abstract-full').style.display = 'inline'; document.getElementById('2004.07802v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.07802v5-abstract-full" style="display: none;"> Recent state-of-the-art methods for neural architecture search (NAS) exploit gradient-based optimization by relaxing the problem into continuous optimization over architectures and shared-weights, a noisy process that remains poorly understood. We argue for the study of single-level empirical risk minimization to understand NAS with weight-sharing, reducing the design of NAS methods to devising optimizers and regularizers that can quickly obtain high-quality solutions to this problem. Invoking the theory of mirror descent, we present a geometry-aware framework that exploits the underlying structure of this optimization to return sparse architectural parameters, leading to simple yet novel algorithms that enjoy fast convergence guarantees and achieve state-of-the-art accuracy on the latest NAS benchmarks in computer vision. Notably, we exceed the best published results for both CIFAR and ImageNet on both the DARTS search space and NAS-Bench201; on the latter we achieve near-oracle-optimal performance on CIFAR-10 and CIFAR-100. Together, our theory and experiments demonstrate a principled way to co-design optimizers and continuous relaxations of discrete NAS search spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.07802v5-abstract-full').style.display = 'none'; document.getElementById('2004.07802v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2021 Camera-Ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.03424">arXiv:2004.03424</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.03424">pdf</a>, <a href="https://arxiv.org/format/2004.03424">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> FACT: A Diagnostic for Group Fairness Trade-offs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J+S">Joon Sik Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jiahao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.03424v3-abstract-short" style="display: inline;"> Group fairness, a class of fairness notions that measure how different groups of individuals are treated differently according to their protected attributes, has been shown to conflict with one another, often with a necessary cost in loss of model&#39;s predictive performance. We propose a general diagnostic that enables systematic characterization of these trade-offs in group fairness. We observe tha&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03424v3-abstract-full').style.display = 'inline'; document.getElementById('2004.03424v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.03424v3-abstract-full" style="display: none;"> Group fairness, a class of fairness notions that measure how different groups of individuals are treated differently according to their protected attributes, has been shown to conflict with one another, often with a necessary cost in loss of model&#39;s predictive performance. We propose a general diagnostic that enables systematic characterization of these trade-offs in group fairness. We observe that the majority of group fairness notions can be expressed via the fairness-confusion tensor, which is the confusion matrix split according to the protected attribute values. We frame several optimization problems that directly optimize both accuracy and fairness objectives over the elements of this tensor, which yield a general perspective for understanding multiple trade-offs including group fairness incompatibilities. It also suggests an alternate post-processing method for designing fair classifiers. On synthetic and real datasets, we demonstrate the use cases of our diagnostic, particularly on understanding the trade-off landscape between accuracy and fairness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03424v3-abstract-full').style.display = 'none'; document.getElementById('2004.03424v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to International Conference on Machine Learning (ICML 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.01640">arXiv:2003.01640</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2003.01640">pdf</a>, <a href="https://arxiv.org/format/2003.01640">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Explaining Groups of Points in Low-Dimensional Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Plumb%2C+G">Gregory Plumb</a>, <a href="/search/cs?searchtype=author&amp;query=Terhorst%2C+J">Jonathan Terhorst</a>, <a href="/search/cs?searchtype=author&amp;query=Sankararaman%2C+S">Sriram Sankararaman</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.01640v3-abstract-short" style="display: inline;"> A common workflow in data exploration is to learn a low-dimensional representation of the data, identify groups of points in that representation, and examine the differences between the groups to determine what they represent. We treat this workflow as an interpretable machine learning problem by leveraging the model that learned the low-dimensional representation to help identify the key differen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.01640v3-abstract-full').style.display = 'inline'; document.getElementById('2003.01640v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.01640v3-abstract-full" style="display: none;"> A common workflow in data exploration is to learn a low-dimensional representation of the data, identify groups of points in that representation, and examine the differences between the groups to determine what they represent. We treat this workflow as an interpretable machine learning problem by leveraging the model that learned the low-dimensional representation to help identify the key differences between the groups. To solve this problem, we introduce a new type of explanation, a Global Counterfactual Explanation (GCE), and our algorithm, Transitive Global Translations (TGT), for computing GCEs. TGT identifies the differences between each pair of groups using compressed sensing but constrains those pairwise differences to be consistent among all of the groups. Empirically, we demonstrate that TGT is able to identify explanations that accurately explain the model while being relatively sparse, and that these explanations match real patterns in the data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.01640v3-abstract-full').style.display = 'none'; document.getElementById('2003.01640v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.01920">arXiv:2001.01920</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2001.01920">pdf</a>, <a href="https://arxiv.org/format/2001.01920">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> FedDANE: A Federated Newton-Type Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sahu%2C+A+K">Anit Kumar Sahu</a>, <a href="/search/cs?searchtype=author&amp;query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&amp;query=Sanjabi%2C+M">Maziar Sanjabi</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.01920v1-abstract-short" style="display: inline;"> Federated learning aims to jointly learn statistical models over massively distributed remote devices. In this work, we propose FedDANE, an optimization method that we adapt from DANE, a method for classical distributed optimization, to handle the practical constraints of federated learning. We provide convergence guarantees for this method when learning over both convex and non-convex functions.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.01920v1-abstract-full').style.display = 'inline'; document.getElementById('2001.01920v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.01920v1-abstract-full" style="display: none;"> Federated learning aims to jointly learn statistical models over massively distributed remote devices. In this work, we propose FedDANE, an optimization method that we adapt from DANE, a method for classical distributed optimization, to handle the practical constraints of federated learning. We provide convergence guarantees for this method when learning over both convex and non-convex functions. Despite encouraging theoretical results, we find that the method has underwhelming performance empirically. In particular, through empirical simulations on both synthetic and real-world datasets, FedDANE consistently underperforms baselines of FedAvg and FedProx in realistic federated settings. We identify low device participation and statistical device heterogeneity as two underlying causes of this underwhelming performance, and conclude by suggesting several directions of future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.01920v1-abstract-full').style.display = 'none'; document.getElementById('2001.01920v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Asilomar Conference on Signals, Systems, and Computers 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.05830">arXiv:1909.05830</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.05830">pdf</a>, <a href="https://arxiv.org/format/1909.05830">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Meta-Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jeffrey Li</a>, <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Caldas%2C+S">Sebastian Caldas</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.05830v2-abstract-short" style="display: inline;"> Parameter-transfer is a well-known and versatile approach for meta-learning, with applications including few-shot learning, federated learning, and reinforcement learning. However, parameter-transfer algorithms often require sharing models that have been trained on the samples from specific tasks, thus leaving the task-owners susceptible to breaches of privacy. We conduct the first formal study of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.05830v2-abstract-full').style.display = 'inline'; document.getElementById('1909.05830v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.05830v2-abstract-full" style="display: none;"> Parameter-transfer is a well-known and versatile approach for meta-learning, with applications including few-shot learning, federated learning, and reinforcement learning. However, parameter-transfer algorithms often require sharing models that have been trained on the samples from specific tasks, thus leaving the task-owners susceptible to breaches of privacy. We conduct the first formal study of privacy in this setting and formalize the notion of task-global differential privacy as a practical relaxation of more commonly studied threat models. We then propose a new differentially private algorithm for gradient-based parameter transfer that not only satisfies this privacy requirement but also retains provable transfer learning guarantees in convex settings. Empirically, we apply our analysis to the problems of federated learning with personalization and few-shot classification, showing that allowing the relaxation to task-global privacy from the more commonly studied notion of local privacy leads to dramatically increased performance in recurrent neural language modeling and image classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.05830v2-abstract-full').style.display = 'none'; document.getElementById('1909.05830v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.07873">arXiv:1908.07873</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1908.07873">pdf</a>, <a href="https://arxiv.org/format/1908.07873">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MSP.2020.2975749">10.1109/MSP.2020.2975749 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Federated Learning: Challenges, Methods, and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sahu%2C+A+K">Anit Kumar Sahu</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.07873v1-abstract-short" style="display: inline;"> Federated learning involves training statistical models over remote devices or siloed data centers, such as mobile phones or hospitals, while keeping data localized. Training in heterogeneous and potentially massive networks introduces novel challenges that require a fundamental departure from standard approaches for large-scale machine learning, distributed optimization, and privacy-preserving da&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.07873v1-abstract-full').style.display = 'inline'; document.getElementById('1908.07873v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.07873v1-abstract-full" style="display: none;"> Federated learning involves training statistical models over remote devices or siloed data centers, such as mobile phones or hospitals, while keeping data localized. Training in heterogeneous and potentially massive networks introduces novel challenges that require a fundamental departure from standard approaches for large-scale machine learning, distributed optimization, and privacy-preserving data analysis. In this article, we discuss the unique characteristics and challenges of federated learning, provide a broad overview of current approaches, and outline several directions of future work that are relevant to a wide range of research communities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.07873v1-abstract-full').style.display = 'none'; document.getElementById('1908.07873v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.11813">arXiv:1906.11813</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.11813">pdf</a>, <a href="https://arxiv.org/ps/1906.11813">ps</a>, <a href="https://arxiv.org/format/1906.11813">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning Fair Representations for Kernel Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Z">Zilong Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Yeom%2C+S">Samuel Yeom</a>, <a href="/search/cs?searchtype=author&amp;query=Fredrikson%2C+M">Matt Fredrikson</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.11813v2-abstract-short" style="display: inline;"> Fair representations are a powerful tool for establishing criteria like statistical parity, proxy non-discrimination, and equality of opportunity in learned models. Existing techniques for learning these representations are typically model-agnostic, as they preprocess the original data such that the output satisfies some fairness criterion, and can be used with arbitrary learning methods. In contr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.11813v2-abstract-full').style.display = 'inline'; document.getElementById('1906.11813v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.11813v2-abstract-full" style="display: none;"> Fair representations are a powerful tool for establishing criteria like statistical parity, proxy non-discrimination, and equality of opportunity in learned models. Existing techniques for learning these representations are typically model-agnostic, as they preprocess the original data such that the output satisfies some fairness criterion, and can be used with arbitrary learning methods. In contrast, we demonstrate the promise of learning a model-aware fair representation, focusing on kernel-based models. We leverage the classical Sufficient Dimension Reduction (SDR) framework to construct representations as subspaces of the reproducing kernel Hilbert space (RKHS), whose member functions are guaranteed to satisfy fairness. Our method supports several fairness criteria, continuous and discrete data, and multiple protected attributes. We further show how to calibrate the accuracy tradeoff by characterizing it in terms of the principal angles between subspaces of the RKHS. Finally, we apply our approach to obtain the first Fair Gaussian Process (FGP) prior for fair Bayesian learning, and show that it is competitive with, and in some cases outperforms, state-of-the-art methods on real data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.11813v2-abstract-full').style.display = 'none'; document.getElementById('1906.11813v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 23rd International Conference on Artificial Intelligence and Statistics (AISTATS 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.02717">arXiv:1906.02717</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.02717">pdf</a>, <a href="https://arxiv.org/format/1906.02717">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Gradient-Based Meta-Learning Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&amp;query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.02717v3-abstract-short" style="display: inline;"> We build a theoretical framework for designing and understanding practical meta-learning methods that integrates sophisticated formalizations of task-similarity with the extensive literature on online convex optimization and sequential prediction algorithms. Our approach enables the task-similarity to be learned adaptively, provides sharper transfer-risk bounds in the setting of statistical learni&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.02717v3-abstract-full').style.display = 'inline'; document.getElementById('1906.02717v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.02717v3-abstract-full" style="display: none;"> We build a theoretical framework for designing and understanding practical meta-learning methods that integrates sophisticated formalizations of task-similarity with the extensive literature on online convex optimization and sequential prediction algorithms. Our approach enables the task-similarity to be learned adaptively, provides sharper transfer-risk bounds in the setting of statistical learning-to-learn, and leads to straightforward derivations of average-case regret bounds for efficient algorithms in settings where the task-environment changes dynamically or the tasks share a certain geometric structure. We use our theory to modify several popular meta-learning algorithms and improve their meta-test-time performance on standard problems in few-shot learning and federated learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.02717v3-abstract-full').style.display = 'none'; document.getElementById('1906.02717v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2019</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Talwalkar%2C+A&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10