CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 132 results for author: <span class="mathjax">Wu, Z S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Wu%2C+Z+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wu, Z S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wu%2C+Z+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wu, Z S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17095">arXiv:2410.17095</a> <span> [<a href="https://arxiv.org/pdf/2410.17095">pdf</a>, <a href="https://arxiv.org/format/2410.17095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Inferentially-Private Private Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqi Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shuran Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/cs?searchtype=author&query=Fanti%2C+G">Giulia Fanti</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17095v1-abstract-short" style="display: inline;"> Information disclosure can compromise privacy when revealed information is correlated with private information. We consider the notion of inferential privacy, which measures privacy leakage by bounding the inferential power a Bayesian adversary can gain by observing a released signal. Our goal is to devise an inferentially-private private information structure that maximizes the informativeness of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17095v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17095v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17095v1-abstract-full" style="display: none;"> Information disclosure can compromise privacy when revealed information is correlated with private information. We consider the notion of inferential privacy, which measures privacy leakage by bounding the inferential power a Bayesian adversary can gain by observing a released signal. Our goal is to devise an inferentially-private private information structure that maximizes the informativeness of the released signal, following the Blackwell ordering principle, while adhering to inferential privacy constraints. To achieve this, we devise an efficient release mechanism that achieves the inferentially-private Blackwell optimal private information structure for the setting where the private information is binary. Additionally, we propose a programming approach to compute the optimal structure for general cases given the utility function. The design of our mechanisms builds on our geometric characterization of the Blackwell-optimal disclosure mechanisms under privacy constraints, which may be of independent interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17095v1-abstract-full').style.display = 'none'; document.getElementById('2410.17095v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02879">arXiv:2410.02879</a> <span> [<a href="https://arxiv.org/pdf/2410.02879">pdf</a>, <a href="https://arxiv.org/format/2410.02879">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Position: LLM Unlearning Benchmarks are Weak Measures of Progress </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thaker%2C+P">Pratiksha Thaker</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Kale%2C+N">Neil Kale</a>, <a href="/search/cs?searchtype=author&query=Maurya%2C+Y">Yash Maurya</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02879v1-abstract-short" style="display: inline;"> Unlearning methods have the potential to improve the privacy and safety of large language models (LLMs) by removing sensitive or harmful information post hoc. The LLM unlearning research community has increasingly turned toward empirical benchmarks to assess the effectiveness of such methods. In this paper, we find that existing benchmarks provide an overly optimistic and potentially misleading vi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02879v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02879v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02879v1-abstract-full" style="display: none;"> Unlearning methods have the potential to improve the privacy and safety of large language models (LLMs) by removing sensitive or harmful information post hoc. The LLM unlearning research community has increasingly turned toward empirical benchmarks to assess the effectiveness of such methods. In this paper, we find that existing benchmarks provide an overly optimistic and potentially misleading view on the effectiveness of candidate unlearning methods. By introducing simple, benign modifications to a number of popular benchmarks, we expose instances where supposedly unlearned information remains accessible, or where the unlearning process has degraded the model's performance on retained information to a much greater extent than indicated by the original benchmark. We identify that existing benchmarks are particularly vulnerable to modifications that introduce even loose dependencies between the forget and retain information. Further, we show that ambiguity in unlearning targets in existing benchmarks can easily lead to the design of methods that overfit to the given test queries. Based on our findings, we urge the community to be cautious when interpreting benchmark results as reliable measures of progress, and we provide several recommendations to guide future LLM unlearning research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02879v1-abstract-full').style.display = 'none'; document.getElementById('2410.02879v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21057">arXiv:2407.21057</a> <span> [<a href="https://arxiv.org/pdf/2407.21057">pdf</a>, <a href="https://arxiv.org/format/2407.21057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-group Uncertainty Quantification for Long-form Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Terrance Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21057v1-abstract-short" style="display: inline;"> While large language models are rapidly moving towards consumer-facing applications, they are often still prone to factual errors and hallucinations. In order to reduce the potential harms that may come from these errors, it is important for users to know to what extent they can trust an LLM when it makes a factual claim. To this end, we study the problem of uncertainty quantification of factual c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21057v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21057v1-abstract-full" style="display: none;"> While large language models are rapidly moving towards consumer-facing applications, they are often still prone to factual errors and hallucinations. In order to reduce the potential harms that may come from these errors, it is important for users to know to what extent they can trust an LLM when it makes a factual claim. To this end, we study the problem of uncertainty quantification of factual correctness in long-form natural language generation. Given some output from a large language model, we study both uncertainty at the level of individual claims contained within the output (via calibration) and uncertainty across the entire output itself (via conformal prediction). Moreover, we invoke multicalibration and multivalid conformal prediction to ensure that such uncertainty guarantees are valid both marginally and across distinct groups of prompts. Using the task of biography generation, we demonstrate empirically that having access to and making use of additional group attributes for each prompt improves both overall and group-wise performance. As the problems of calibration, conformal prediction, and their multi-group counterparts have not been extensively explored previously in the context of long-form text generation, we consider these empirical results to form a benchmark for this setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21057v1-abstract-full').style.display = 'none'; document.getElementById('2407.21057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04776">arXiv:2407.04776</a> <span> [<a href="https://arxiv.org/pdf/2407.04776">pdf</a>, <a href="https://arxiv.org/format/2407.04776">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Quantifying Privacy Risks of Public Statistics to Residents of Subsidized Housing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Steed%2C+R">Ryan Steed</a>, <a href="/search/cs?searchtype=author&query=Qing%2C+D">Diana Qing</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04776v1-abstract-short" style="display: inline;"> As the U.S. Census Bureau implements its controversial new disclosure avoidance system, researchers and policymakers debate the necessity of new privacy protections for public statistics. With experiments on both published statistics and synthetic data, we explore a particular privacy concern: respondents in subsidized housing may deliberately not mention unauthorized children and other household… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04776v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04776v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04776v1-abstract-full" style="display: none;"> As the U.S. Census Bureau implements its controversial new disclosure avoidance system, researchers and policymakers debate the necessity of new privacy protections for public statistics. With experiments on both published statistics and synthetic data, we explore a particular privacy concern: respondents in subsidized housing may deliberately not mention unauthorized children and other household members for fear of being evicted. By combining public statistics from the Decennial Census and the Department of Housing and Urban Development, we demonstrate a simple, inexpensive reconstruction attack that could identify subsidized households living in violation of occupancy guidelines in 2010. Experiments on synthetic data suggest that a random swapping mechanism similar to the Census Bureau's 2010 disclosure avoidance measures does not significantly reduce the precision of this attack, while a differentially private mechanism similar to the 2020 disclosure avoidance system does. Our results provide a valuable example for policymakers seeking a trustworthy, accurate census. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04776v1-abstract-full').style.display = 'none'; document.getElementById('2407.04776v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13356">arXiv:2406.13356</a> <span> [<a href="https://arxiv.org/pdf/2406.13356">pdf</a>, <a href="https://arxiv.org/format/2406.13356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Jogging the Memory of Unlearned LLMs Through Targeted Relearning Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yiwei Fu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13356v3-abstract-short" style="display: inline;"> Machine unlearning is a promising approach to mitigate undesirable memorization of training data in LLMs. However, in this work we show that existing approaches for unlearning in LLMs are surprisingly susceptible to a simple set of targeted relearning attacks. With access to only a small and potentially loosely related set of data, we find that we can "jog" the memory of unlearned models to revers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13356v3-abstract-full').style.display = 'inline'; document.getElementById('2406.13356v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13356v3-abstract-full" style="display: none;"> Machine unlearning is a promising approach to mitigate undesirable memorization of training data in LLMs. However, in this work we show that existing approaches for unlearning in LLMs are surprisingly susceptible to a simple set of targeted relearning attacks. With access to only a small and potentially loosely related set of data, we find that we can "jog" the memory of unlearned models to reverse the effects of unlearning. For example, we show that relearning on public medical articles can lead an unlearned LLM to output harmful knowledge about bioweapons, and relearning general wiki information about the book series Harry Potter can force the model to output verbatim memorized text. We formalize this unlearning-relearning pipeline, explore the attack across three popular unlearning benchmarks, and discuss future directions and guidelines that result from our study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13356v3-abstract-full').style.display = 'none'; document.getElementById('2406.13356v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 5 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04219">arXiv:2406.04219</a> <span> [<a href="https://arxiv.org/pdf/2406.04219">pdf</a>, <a href="https://arxiv.org/format/2406.04219">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Imitation Learning: Value is Easy, Regret is Hard </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jingwu Tang</a>, <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+F">Fei Fang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04219v2-abstract-short" style="display: inline;"> We study a multi-agent imitation learning (MAIL) problem where we take the perspective of a learner attempting to coordinate a group of agents based on demonstrations of an expert doing so. Most prior work in MAIL essentially reduces the problem to matching the behavior of the expert within the support of the demonstrations. While doing so is sufficient to drive the value gap between the learner a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04219v2-abstract-full').style.display = 'inline'; document.getElementById('2406.04219v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04219v2-abstract-full" style="display: none;"> We study a multi-agent imitation learning (MAIL) problem where we take the perspective of a learner attempting to coordinate a group of agents based on demonstrations of an expert doing so. Most prior work in MAIL essentially reduces the problem to matching the behavior of the expert within the support of the demonstrations. While doing so is sufficient to drive the value gap between the learner and the expert to zero under the assumption that agents are non-strategic, it does not guarantee robustness to deviations by strategic agents. Intuitively, this is because strategic deviations can depend on a counterfactual quantity: the coordinator's recommendations outside of the state distribution their recommendations induce. In response, we initiate the study of an alternative objective for MAIL in Markov Games we term the regret gap that explicitly accounts for potential deviations by agents in the group. We first perform an in-depth exploration of the relationship between the value and regret gaps. First, we show that while the value gap can be efficiently minimized via a direct extension of single-agent IL algorithms, even value equivalence can lead to an arbitrarily large regret gap. This implies that achieving regret equivalence is harder than achieving value equivalence in MAIL. We then provide a pair of efficient reductions to no-regret online convex optimization that are capable of minimizing the regret gap (a) under a coverage assumption on the expert (MALICE) or (b) with access to a queryable expert (BLADES). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04219v2-abstract-full').style.display = 'none'; document.getElementById('2406.04219v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01933">arXiv:2406.01933</a> <span> [<a href="https://arxiv.org/pdf/2406.01933">pdf</a>, <a href="https://arxiv.org/ps/2406.01933">ps</a>, <a href="https://arxiv.org/format/2406.01933">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Orthogonal Causal Calibration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Whitehouse%2C+J">Justin Whitehouse</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+C">Christopher Jung</a>, <a href="/search/cs?searchtype=author&query=Syrgkanis%2C+V">Vasilis Syrgkanis</a>, <a href="/search/cs?searchtype=author&query=Wilder%2C+B">Bryan Wilder</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01933v1-abstract-short" style="display: inline;"> Estimates of causal parameters such as conditional average treatment effects and conditional quantile treatment effects play an important role in real-world decision making. Given this importance, one should ensure these estimators are calibrated. While there is a rich literature on calibrating estimators of non-causal parameters, very few methods have been derived for calibrating estimators of ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01933v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01933v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01933v1-abstract-full" style="display: none;"> Estimates of causal parameters such as conditional average treatment effects and conditional quantile treatment effects play an important role in real-world decision making. Given this importance, one should ensure these estimators are calibrated. While there is a rich literature on calibrating estimators of non-causal parameters, very few methods have been derived for calibrating estimators of causal parameters, or more generally estimators of quantities involving nuisance parameters. In this work, we provide a general framework for calibrating predictors involving nuisance estimation. We consider a notion of calibration defined with respect to an arbitrary, nuisance-dependent loss $\ell$, under which we say an estimator $胃$ is calibrated if its predictions cannot be changed on any level set to decrease loss. We prove generic upper bounds on the calibration error of any causal parameter estimate $胃$ with respect to any loss $\ell$ using a concept called Neyman Orthogonality. Our bounds involve two decoupled terms - one measuring the error in estimating the unknown nuisance parameters, and the other representing the calibration error in a hypothetical world where the learned nuisance estimates were true. We use our bound to analyze the convergence of two sample splitting algorithms for causal calibration. One algorithm, which applies to universally orthogonalizable loss functions, transforms the data into generalized pseudo-outcomes and applies an off-the-shelf calibration procedure. The other algorithm, which applies to conditionally orthogonalizable loss functions, extends the classical uniform mass binning algorithm to include nuisance estimation. Our results are exceedingly general, showing that essentially any existing calibration algorithm can be used in causal settings, with additional loss only arising from errors in nuisance estimation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01933v1-abstract-full').style.display = 'none'; document.getElementById('2406.01933v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00661">arXiv:2406.00661</a> <span> [<a href="https://arxiv.org/pdf/2406.00661">pdf</a>, <a href="https://arxiv.org/format/2406.00661">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bridging Multicalibration and Out-of-distribution Generalization Beyond Covariate Shift </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiayun Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiashuo Liu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+P">Peng Cui</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00661v1-abstract-short" style="display: inline;"> We establish a new model-agnostic optimization framework for out-of-distribution generalization via multicalibration, a criterion that ensures a predictor is calibrated across a family of overlapping groups. Multicalibration is shown to be associated with robustness of statistical inference under covariate shift. We further establish a link between multicalibration and robustness for prediction ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00661v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00661v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00661v1-abstract-full" style="display: none;"> We establish a new model-agnostic optimization framework for out-of-distribution generalization via multicalibration, a criterion that ensures a predictor is calibrated across a family of overlapping groups. Multicalibration is shown to be associated with robustness of statistical inference under covariate shift. We further establish a link between multicalibration and robustness for prediction tasks both under and beyond covariate shift. We accomplish this by extending multicalibration to incorporate grouping functions that consider covariates and labels jointly. This leads to an equivalence of the extended multicalibration and invariance, an objective for robust learning in existence of concept shift. We show a linear structure of the grouping function class spanned by density ratios, resulting in a unifying framework for robust learning by designing specific grouping functions. We propose MC-Pseudolabel, a post-processing algorithm to achieve both extended multicalibration and out-of-distribution generalization. The algorithm, with lightweight hyperparameters and optimization through a series of supervised regression steps, achieves superior performance on real-world datasets with distribution shift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00661v1-abstract-full').style.display = 'none'; document.getElementById('2406.00661v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20272">arXiv:2405.20272</a> <span> [<a href="https://arxiv.org/pdf/2405.20272">pdf</a>, <a href="https://arxiv.org/format/2405.20272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Reconstruction Attacks on Machine Unlearning: Simple Models are Vulnerable </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bertran%2C+M">Martin Bertran</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shuai Tang</a>, <a href="/search/cs?searchtype=author&query=Kearns%2C+M">Michael Kearns</a>, <a href="/search/cs?searchtype=author&query=Morgenstern%2C+J">Jamie Morgenstern</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+A">Aaron Roth</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20272v1-abstract-short" style="display: inline;"> Machine unlearning is motivated by desire for data autonomy: a person can request to have their data's influence removed from deployed models, and those models should be updated as if they were retrained without the person's data. We show that, counter-intuitively, these updates expose individuals to high-accuracy reconstruction attacks which allow the attacker to recover their data in its entiret… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20272v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20272v1-abstract-full" style="display: none;"> Machine unlearning is motivated by desire for data autonomy: a person can request to have their data's influence removed from deployed models, and those models should be updated as if they were retrained without the person's data. We show that, counter-intuitively, these updates expose individuals to high-accuracy reconstruction attacks which allow the attacker to recover their data in its entirety, even when the original models are so simple that privacy risk might not otherwise have been a concern. We show how to mount a near-perfect attack on the deleted data point from linear regression models. We then generalize our attack to other loss functions and architectures, and empirically demonstrate the effectiveness of our attacks across a wide range of datasets (capturing both tabular and image data). Our work highlights that privacy risk is significant even for extremely simple model classes when individuals can request deletion of their data from the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20272v1-abstract-full').style.display = 'none'; document.getElementById('2405.20272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19667">arXiv:2405.19667</a> <span> [<a href="https://arxiv.org/pdf/2405.19667">pdf</a>, <a href="https://arxiv.org/format/2405.19667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reconciling Model Multiplicity for Downstream Decision Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+A+Y">Ally Yalei Du</a>, <a href="/search/cs?searchtype=author&query=Ngo%2C+D+D">Dung Daniel Ngo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19667v1-abstract-short" style="display: inline;"> We consider the problem of model multiplicity in downstream decision-making, a setting where two predictive models of equivalent accuracy cannot agree on the best-response action for a downstream loss function. We show that even when the two predictive models approximately agree on their individual predictions almost everywhere, it is still possible for their induced best-response actions to diffe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19667v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19667v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19667v1-abstract-full" style="display: none;"> We consider the problem of model multiplicity in downstream decision-making, a setting where two predictive models of equivalent accuracy cannot agree on the best-response action for a downstream loss function. We show that even when the two predictive models approximately agree on their individual predictions almost everywhere, it is still possible for their induced best-response actions to differ on a substantial portion of the population. We address this issue by proposing a framework that calibrates the predictive models with regard to both the downstream decision-making problem and the individual probability prediction. Specifically, leveraging tools from multi-calibration, we provide an algorithm that, at each time-step, first reconciles the differences in individual probability prediction, then calibrates the updated models such that they are indistinguishable from the true probability distribution to the decision-maker. We extend our results to the setting where one does not have direct access to the true probability distribution and instead relies on a set of i.i.d data to be the empirical distribution. Finally, we provide a set of experiments to empirically evaluate our methods: compared to existing work, our proposed algorithm creates a pair of predictive models with both improved downstream decision-making losses and agrees on their best-response actions almost everywhere. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19667v1-abstract-full').style.display = 'none'; document.getElementById('2405.19667v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages main body, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00848">arXiv:2404.00848</a> <span> [<a href="https://arxiv.org/pdf/2404.00848">pdf</a>, <a href="https://arxiv.org/format/2404.00848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Predictive Performance Comparison of Decision Policies Under Confounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guerdan%2C+L">Luke Guerdan</a>, <a href="/search/cs?searchtype=author&query=Coston%2C+A">Amanda Coston</a>, <a href="/search/cs?searchtype=author&query=Holstein%2C+K">Kenneth Holstein</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00848v2-abstract-short" style="display: inline;"> Predictive models are often introduced to decision-making tasks under the rationale that they improve performance over an existing decision-making policy. However, it is challenging to compare predictive performance against an existing decision-making policy that is generally under-specified and dependent on unobservable factors. These sources of uncertainty are often addressed in practice by maki… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00848v2-abstract-full').style.display = 'inline'; document.getElementById('2404.00848v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00848v2-abstract-full" style="display: none;"> Predictive models are often introduced to decision-making tasks under the rationale that they improve performance over an existing decision-making policy. However, it is challenging to compare predictive performance against an existing decision-making policy that is generally under-specified and dependent on unobservable factors. These sources of uncertainty are often addressed in practice by making strong assumptions about the data-generating mechanism. In this work, we propose a method to compare the predictive performance of decision policies under a variety of modern identification approaches from the causal inference and off-policy evaluation literatures (e.g., instrumental variable, marginal sensitivity model, proximal variable). Key to our method is the insight that there are regions of uncertainty that we can safely ignore in the policy comparison. We develop a practical approach for finite-sample estimation of regret intervals under no assumptions on the parametric form of the status quo policy. We verify our framework theoretically and via synthetic data experiments. We conclude with a real-world application using our framework to support a pre-deployment evaluation of a proposed modification to a healthcare enrollment policy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00848v2-abstract-full').style.display = 'none'; document.getElementById('2404.00848v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05006">arXiv:2403.05006</a> <span> [<a href="https://arxiv.org/pdf/2403.05006">pdf</a>, <a href="https://arxiv.org/ps/2403.05006">ps</a>, <a href="https://arxiv.org/format/2403.05006">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Provable Multi-Party Reinforcement Learning with Diverse Human Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+H">Huiying Zhong</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhun Deng</a>, <a href="/search/cs?searchtype=author&query=Su%2C+W+J">Weijie J. Su</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linjun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05006v1-abstract-short" style="display: inline;"> Reinforcement learning with human feedback (RLHF) is an emerging paradigm to align models with human preferences. Typically, RLHF aggregates preferences from multiple individuals who have diverse viewpoints that may conflict with each other. Our work \textit{initiates} the theoretical study of multi-party RLHF that explicitly models the diverse preferences of multiple individuals. We show how trad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05006v1-abstract-full').style.display = 'inline'; document.getElementById('2403.05006v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05006v1-abstract-full" style="display: none;"> Reinforcement learning with human feedback (RLHF) is an emerging paradigm to align models with human preferences. Typically, RLHF aggregates preferences from multiple individuals who have diverse viewpoints that may conflict with each other. Our work \textit{initiates} the theoretical study of multi-party RLHF that explicitly models the diverse preferences of multiple individuals. We show how traditional RLHF approaches can fail since learning a single reward function cannot capture and balance the preferences of multiple individuals. To overcome such limitations, we incorporate meta-learning to learn multiple preferences and adopt different social welfare functions to aggregate the preferences across multiple parties. We focus on the offline learning setting and establish sample complexity bounds, along with efficiency and fairness guarantees, for optimizing diverse social welfare functions such as Nash, Utilitarian, and Leximin welfare functions. Our results show a separation between the sample complexities of multi-party RLHF and traditional single-party RLHF. Furthermore, we consider a reward-free setting, where each individual's preference is no longer consistent with a reward model, and give pessimistic variants of the von Neumann Winner based on offline preference data. Taken together, our work showcases the advantage of multi-party RLHF but also highlights its more demanding statistical complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05006v1-abstract-full').style.display = 'none'; document.getElementById('2403.05006v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03329">arXiv:2403.03329</a> <span> [<a href="https://arxiv.org/pdf/2403.03329">pdf</a>, <a href="https://arxiv.org/format/2403.03329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Guardrail Baselines for Unlearning in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thaker%2C+P">Pratiksha Thaker</a>, <a href="/search/cs?searchtype=author&query=Maurya%2C+Y">Yash Maurya</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03329v3-abstract-short" style="display: inline;"> Recent work has demonstrated that finetuning is a promising approach to 'unlearn' concepts from large language models. However, finetuning can be expensive, as it requires both generating a set of examples and running iterations of finetuning to update the model. In this work, we show that simple guardrail-based approaches such as prompting and filtering can achieve unlearning results comparable t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03329v3-abstract-full').style.display = 'inline'; document.getElementById('2403.03329v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03329v3-abstract-full" style="display: none;"> Recent work has demonstrated that finetuning is a promising approach to 'unlearn' concepts from large language models. However, finetuning can be expensive, as it requires both generating a set of examples and running iterations of finetuning to update the model. In this work, we show that simple guardrail-based approaches such as prompting and filtering can achieve unlearning results comparable to finetuning. We recommend that researchers investigate these lightweight baselines when evaluating the performance of more computationally intensive finetuning methods. While we do not claim that methods such as prompting or filtering are universal solutions to the problem of unlearning, our work suggests the need for evaluation metrics that can better separate the power of guardrails vs. finetuning, and highlights scenarios where guardrails expose possible unintended behavior in existing metrics and benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03329v3-abstract-full').style.display = 'none'; document.getElementById('2403.03329v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preliminary work, accepted to ICLR workshop SeT-LLM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15872">arXiv:2402.15872</a> <span> [<a href="https://arxiv.org/pdf/2402.15872">pdf</a>, <a href="https://arxiv.org/ps/2402.15872">ps</a>, <a href="https://arxiv.org/format/2402.15872">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Bayesian Persuasion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+Y">Yuqi Pan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haifeng Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shuran Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15872v1-abstract-short" style="display: inline;"> The tension between persuasion and privacy preservation is common in real-world settings. Online platforms should protect the privacy of web users whose data they collect, even as they seek to disclose information about these data to selling advertising spaces. Similarly, hospitals may share patient data to attract research investments with the obligation to preserve patients' privacy. To deal wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15872v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15872v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15872v1-abstract-full" style="display: none;"> The tension between persuasion and privacy preservation is common in real-world settings. Online platforms should protect the privacy of web users whose data they collect, even as they seek to disclose information about these data to selling advertising spaces. Similarly, hospitals may share patient data to attract research investments with the obligation to preserve patients' privacy. To deal with these issues, we develop a framework to study Bayesian persuasion under differential privacy constraints, where the sender must design an optimal signaling scheme for persuasion while guaranteeing the privacy of each agent's private information in the database. To understand how privacy constraints affect information disclosure, we explore two perspectives within Bayesian persuasion: one views the mechanism as releasing a posterior about the private data, while the other views it as sending an action recommendation. The posterior-based formulation helps consider privacy-utility tradeoffs, quantifying how the tightness of privacy constraints impacts the sender's optimal utility. For any instance in a common utility function family and a wide range of privacy levels, a significant constant utility gap can be found between any two of the three conditions: $蔚$-differential privacy constraint, relaxation $(蔚,未)$-differential privacy constraint, and no privacy constraint. We further geometrically characterize optimal signaling schemes under different types of constraints ($蔚$-differential privacy, $(蔚,未)$-differential privacy and Renyi differential privacy), all of which can be seen as finding concave hulls in constrained posterior regions. Meanwhile, by taking the action-based view of persuasion, we provide polynomial-time algorithms for computing optimal differentially private signaling schemes, as long as a mild homogeneous condition is met. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15872v1-abstract-full').style.display = 'none'; document.getElementById('2402.15872v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08848">arXiv:2402.08848</a> <span> [<a href="https://arxiv.org/pdf/2402.08848">pdf</a>, <a href="https://arxiv.org/format/2402.08848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Inverse Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+J">Juntao Ren</a>, <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Bagnell%2C+J+A">J. Andrew Bagnell</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08848v2-abstract-short" style="display: inline;"> The inverse reinforcement learning approach to imitation learning is a double-edged sword. On the one hand, it can enable learning from a smaller number of expert demonstrations with more robustness to error compounding than behavioral cloning approaches. On the other hand, it requires that the learner repeatedly solve a computationally expensive reinforcement learning (RL) problem. Often, much of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08848v2-abstract-full').style.display = 'inline'; document.getElementById('2402.08848v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08848v2-abstract-full" style="display: none;"> The inverse reinforcement learning approach to imitation learning is a double-edged sword. On the one hand, it can enable learning from a smaller number of expert demonstrations with more robustness to error compounding than behavioral cloning approaches. On the other hand, it requires that the learner repeatedly solve a computationally expensive reinforcement learning (RL) problem. Often, much of this computation is wasted searching over policies very dissimilar to the expert's. In this work, we propose using hybrid RL -- training on a mixture of online and expert data -- to curtail unnecessary exploration. Intuitively, the expert data focuses the learner on good states during training, which reduces the amount of exploration required to compute a strong policy. Notably, such an approach doesn't need the ability to reset the learner to arbitrary states in the environment, a requirement of prior work in efficient inverse RL. More formally, we derive a reduction from inverse RL to expert-competitive RL (rather than globally optimal RL) that allows us to dramatically reduce interaction during the inner policy search loop while maintaining the benefits of the IRL approach. This allows us to derive both model-free and model-based hybrid inverse RL algorithms with strong policy performance guarantees. Empirically, we find that our approaches are significantly more sample efficient than standard inverse RL and several other baselines on a suite of continuous control tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08848v2-abstract-full').style.display = 'none'; document.getElementById('2402.08848v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08576">arXiv:2402.08576</a> <span> [<a href="https://arxiv.org/pdf/2402.08576">pdf</a>, <a href="https://arxiv.org/format/2402.08576">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Regret Minimization in Stackelberg Games with Side Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Balcan%2C+M">Maria-Florina Balcan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08576v4-abstract-short" style="display: inline;"> Algorithms for playing in Stackelberg games have been deployed in real-world domains including airport security, anti-poaching efforts, and cyber-crime prevention. However, these algorithms often fail to take into consideration the additional information available to each player (e.g. traffic patterns, weather conditions, network congestion), which may significantly affect both players' optimal st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08576v4-abstract-full').style.display = 'inline'; document.getElementById('2402.08576v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08576v4-abstract-full" style="display: none;"> Algorithms for playing in Stackelberg games have been deployed in real-world domains including airport security, anti-poaching efforts, and cyber-crime prevention. However, these algorithms often fail to take into consideration the additional information available to each player (e.g. traffic patterns, weather conditions, network congestion), which may significantly affect both players' optimal strategies. We formalize such settings as Stackelberg games with side information, in which both players observe an external context before playing. The leader commits to a (context-dependent) strategy, and the follower best-responds to both the leader's strategy and the context. We focus on the online setting in which a sequence of followers arrive over time, and the context may change from round-to-round. In sharp contrast to the non-contextual version, we show that it is impossible for the leader to achieve no-regret in the full adversarial setting. Motivated by this result, we show that no-regret learning is possible in two natural relaxations: the setting in which the sequence of followers is chosen stochastically and the sequence of contexts is adversarial, and the setting in which contexts are stochastic and follower types are adversarial. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08576v4-abstract-full').style.display = 'none'; document.getElementById('2402.08576v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In the thirty-eighth Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02616">arXiv:2402.02616</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Virtues of Pessimism in Inverse Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+D">David Wu</a>, <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Bagnell%2C+J+A">J. Andrew Bagnell</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02616v2-abstract-short" style="display: inline;"> Inverse Reinforcement Learning (IRL) is a powerful framework for learning complex behaviors from expert demonstrations. However, it traditionally requires repeatedly solving a computationally expensive reinforcement learning (RL) problem in its inner loop. It is desirable to reduce the exploration burden by leveraging expert demonstrations in the inner-loop RL. As an example, recent work resets th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02616v2-abstract-full').style.display = 'inline'; document.getElementById('2402.02616v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02616v2-abstract-full" style="display: none;"> Inverse Reinforcement Learning (IRL) is a powerful framework for learning complex behaviors from expert demonstrations. However, it traditionally requires repeatedly solving a computationally expensive reinforcement learning (RL) problem in its inner loop. It is desirable to reduce the exploration burden by leveraging expert demonstrations in the inner-loop RL. As an example, recent work resets the learner to expert states in order to inform the learner of high-reward expert states. However, such an approach is infeasible in the real world. In this work, we consider an alternative approach to speeding up the RL subroutine in IRL: \emph{pessimism}, i.e., staying close to the expert's data distribution, instantiated via the use of offline RL algorithms. We formalize a connection between offline RL and IRL, enabling us to use an arbitrary offline RL algorithm to improve the sample efficiency of IRL. We validate our theory experimentally by demonstrating a strong correlation between the efficacy of an offline RL algorithm and how well it works as part of an IRL procedure. By using a strong offline RL algorithm as part of an IRL procedure, we are able to find policies that match expert performance significantly more efficiently than the prior art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02616v2-abstract-full').style.display = 'none'; document.getElementById('2402.02616v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been withdrawn by the authors pending edits from other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04056">arXiv:2401.04056</a> <span> [<a href="https://arxiv.org/pdf/2401.04056">pdf</a>, <a href="https://arxiv.org/format/2401.04056">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Minimaximalist Approach to Reinforcement Learning from Human Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Dann%2C+C">Christoph Dann</a>, <a href="/search/cs?searchtype=author&query=Kidambi%2C+R">Rahul Kidambi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Alekh Agarwal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04056v2-abstract-short" style="display: inline;"> We present Self-Play Preference Optimization (SPO), an algorithm for reinforcement learning from human feedback. Our approach is minimalist in that it does not require training a reward model nor unstable adversarial training and is therefore rather simple to implement. Our approach is maximalist in that it provably handles non-Markovian, intransitive, and stochastic preferences while being robust… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04056v2-abstract-full').style.display = 'inline'; document.getElementById('2401.04056v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04056v2-abstract-full" style="display: none;"> We present Self-Play Preference Optimization (SPO), an algorithm for reinforcement learning from human feedback. Our approach is minimalist in that it does not require training a reward model nor unstable adversarial training and is therefore rather simple to implement. Our approach is maximalist in that it provably handles non-Markovian, intransitive, and stochastic preferences while being robust to the compounding errors that plague offline approaches to sequential prediction. To achieve the preceding qualities, we build upon the concept of a Minimax Winner (MW), a notion of preference aggregation from the social choice theory literature that frames learning from preferences as a zero-sum game between two policies. By leveraging the symmetry of this game, we prove that rather than using the traditional technique of dueling two policies to compute the MW, we can simply have a single agent play against itself while maintaining strong convergence guarantees. Practically, this corresponds to sampling multiple trajectories from a policy, asking a preference or teacher model to compare them, and then using the proportion of wins as the reward for a particular trajectory. We demonstrate that on a suite of continuous control tasks, we are able to learn significantly more efficiently than reward-model based approaches while maintaining robustness to the intransitive and stochastic preferences that frequently occur in practice when aggregating human judgments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04056v2-abstract-full').style.display = 'none'; document.getElementById('2401.04056v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.16307">arXiv:2312.16307</a> <span> [<a href="https://arxiv.org/pdf/2312.16307">pdf</a>, <a href="https://arxiv.org/format/2312.16307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Incentive-Aware Synthetic Control: Accurate Counterfactual Estimation via Incentivized Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ngo%2C+D">Daniel Ngo</a>, <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Anish Agarwal</a>, <a href="/search/cs?searchtype=author&query=Syrgkanis%2C+V">Vasilis Syrgkanis</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.16307v2-abstract-short" style="display: inline;"> We consider the setting of synthetic control methods (SCMs), a canonical approach used to estimate the treatment effect on the treated in a panel data setting. We shed light on a frequently overlooked but ubiquitous assumption made in SCMs of "overlap": a treated unit can be written as some combination -- typically, convex or linear combination -- of the units that remain under control. We show th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16307v2-abstract-full').style.display = 'inline'; document.getElementById('2312.16307v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.16307v2-abstract-full" style="display: none;"> We consider the setting of synthetic control methods (SCMs), a canonical approach used to estimate the treatment effect on the treated in a panel data setting. We shed light on a frequently overlooked but ubiquitous assumption made in SCMs of "overlap": a treated unit can be written as some combination -- typically, convex or linear combination -- of the units that remain under control. We show that if units select their own interventions, and there is sufficiently large heterogeneity between units that prefer different interventions, overlap will not hold. We address this issue by proposing a framework which incentivizes units with different preferences to take interventions they would not normally consider. Specifically, leveraging tools from information design and online learning, we propose a SCM that incentivizes exploration in panel data settings by providing incentive-compatible intervention recommendations to units. We establish this estimator obtains valid counterfactual estimates without the need for an a priori overlap assumption. We extend our results to the setting of synthetic interventions, where the goal is to produce counterfactual outcomes under all interventions, not just control. Finally, we provide two hypothesis tests for determining whether unit overlap holds for a given panel dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16307v2-abstract-full').style.display = 'none'; document.getElementById('2312.16307v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15551">arXiv:2312.15551</a> <span> [<a href="https://arxiv.org/pdf/2312.15551">pdf</a>, <a href="https://arxiv.org/format/2312.15551">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On the Benefits of Public Representations for Private Transfer Learning under Distribution Shift </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thaker%2C+P">Pratiksha Thaker</a>, <a href="/search/cs?searchtype=author&query=Setlur%2C+A">Amrith Setlur</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15551v4-abstract-short" style="display: inline;"> Public pretraining is a promising approach to improve differentially private model training. However, recent work has noted that many positive research results studying this paradigm only consider in-distribution tasks, and may not apply to settings where there is distribution shift between the pretraining and finetuning data -- a scenario that is likely when finetuning private tasks due to the se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15551v4-abstract-full').style.display = 'inline'; document.getElementById('2312.15551v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15551v4-abstract-full" style="display: none;"> Public pretraining is a promising approach to improve differentially private model training. However, recent work has noted that many positive research results studying this paradigm only consider in-distribution tasks, and may not apply to settings where there is distribution shift between the pretraining and finetuning data -- a scenario that is likely when finetuning private tasks due to the sensitive nature of the data. In this work, we show empirically across three tasks that even in settings with large distribution shift, where both zero-shot performance from public data and training from scratch with private data give unusably weak results, public features can in fact improve private training accuracy by up to 67\% over private training from scratch. We provide a theoretical explanation for this phenomenon, showing that if the public and private data share a low-dimensional representation, public representations can improve the sample complexity of private training even if it is impossible to learn the private task from the public data alone. Altogether, our results provide evidence that public data can indeed make private training practical in realistic settings of extreme distribution shift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15551v4-abstract-full').style.display = 'none'; document.getElementById('2312.15551v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05140">arXiv:2312.05140</a> <span> [<a href="https://arxiv.org/pdf/2312.05140">pdf</a>, <a href="https://arxiv.org/format/2312.05140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Membership Inference Attacks on Diffusion Models via Quantile Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shuai Tang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Aydore%2C+S">Sergul Aydore</a>, <a href="/search/cs?searchtype=author&query=Kearns%2C+M">Michael Kearns</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+A">Aaron Roth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05140v1-abstract-short" style="display: inline;"> Recently, diffusion models have become popular tools for image synthesis because of their high-quality outputs. However, like other large-scale models, they may leak private information about their training data. Here, we demonstrate a privacy vulnerability of diffusion models through a \emph{membership inference (MI) attack}, which aims to identify whether a target example belongs to the training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05140v1-abstract-full').style.display = 'inline'; document.getElementById('2312.05140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05140v1-abstract-full" style="display: none;"> Recently, diffusion models have become popular tools for image synthesis because of their high-quality outputs. However, like other large-scale models, they may leak private information about their training data. Here, we demonstrate a privacy vulnerability of diffusion models through a \emph{membership inference (MI) attack}, which aims to identify whether a target example belongs to the training set when given the trained diffusion model. Our proposed MI attack learns quantile regression models that predict (a quantile of) the distribution of reconstruction loss on examples not used in training. This allows us to define a granular hypothesis test for determining the membership of a point in the training set, based on thresholding the reconstruction loss of that point using a custom threshold tailored to the example. We also provide a simple bootstrap technique that takes a majority membership prediction over ``a bag of weak attackers'' which improves the accuracy over individual quantile regression models. We show that our attack outperforms the prior state-of-the-art attack while being substantially less computationally expensive -- prior attacks required training multiple ``shadow models'' with the same architecture as the model under attack, whereas our attack requires training only much smaller models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05140v1-abstract-full').style.display = 'none'; document.getElementById('2312.05140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04973">arXiv:2312.04973</a> <span> [<a href="https://arxiv.org/pdf/2312.04973">pdf</a>, <a href="https://arxiv.org/format/2312.04973">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Ex-post Individually Rational Bayesian Persuasion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiahao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shuran Zheng</a>, <a href="/search/cs?searchtype=author&query=Leme%2C+R+P">Renato Paes Leme</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04973v1-abstract-short" style="display: inline;"> The success of Bayesian persuasion relies on the key assumption that the sender will commit to a predetermined information disclosure policy (signaling scheme). However, in practice, it is usually difficult for the receiver to monitor whether the sender sticks to the disclosure policy, which makes the credibility of the sender's disclosure policy questionable. The sender's credibility is particula… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04973v1-abstract-full').style.display = 'inline'; document.getElementById('2312.04973v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04973v1-abstract-full" style="display: none;"> The success of Bayesian persuasion relies on the key assumption that the sender will commit to a predetermined information disclosure policy (signaling scheme). However, in practice, it is usually difficult for the receiver to monitor whether the sender sticks to the disclosure policy, which makes the credibility of the sender's disclosure policy questionable. The sender's credibility is particularly tenuous when there are obvious deviations that benefit the sender. In this work, we identify such a deviation: the sender may be unwilling to send a signal that will lead to a less desirable outcome compared to no information disclosure. We thus propose the notion of ex-post individually rational (ex-post IR) Bayesian persuasion: after observing the state, the sender is never required to send a signal that will make the outcome worse off (compared to no information disclosure). An ex-post IR Bayesian persuasion policy is more likely to be truthfully followed by the sender, and thus more credible for the receiver. Our contribution is threefold. Firstly, we demonstrate that the optimal ex-post IR Bayesian persuasion policy can be efficiently computed through a linear program, while also offering geometric characterizations of this optimal policy. Second, we show that surprisingly, for non-trivial classes of games, the imposition of ex-post IR constraints does not affect the sender's expected utility. Finally, we compare ex-post IR Bayesian persuasion to other information disclosure models that ensure different notions of credibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04973v1-abstract-full').style.display = 'none'; document.getElementById('2312.04973v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.14632">arXiv:2311.14632</a> <span> [<a href="https://arxiv.org/pdf/2311.14632">pdf</a>, <a href="https://arxiv.org/format/2311.14632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private SGD Without Clipping Bias: An Error-Feedback Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+Z">Zhiqi Bu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+M">Mingyi Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.14632v2-abstract-short" style="display: inline;"> Differentially Private Stochastic Gradient Descent with Gradient Clipping (DPSGD-GC) is a powerful tool for training deep learning models using sensitive data, providing both a solid theoretical privacy guarantee and high efficiency. However, using DPSGD-GC to ensure Differential Privacy (DP) comes at the cost of model performance degradation due to DP noise injection and gradient clipping. Existi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14632v2-abstract-full').style.display = 'inline'; document.getElementById('2311.14632v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.14632v2-abstract-full" style="display: none;"> Differentially Private Stochastic Gradient Descent with Gradient Clipping (DPSGD-GC) is a powerful tool for training deep learning models using sensitive data, providing both a solid theoretical privacy guarantee and high efficiency. However, using DPSGD-GC to ensure Differential Privacy (DP) comes at the cost of model performance degradation due to DP noise injection and gradient clipping. Existing research has extensively analyzed the theoretical convergence of DPSGD-GC, and has shown that it only converges when using large clipping thresholds that are dependent on problem-specific parameters. Unfortunately, these parameters are often unknown in practice, making it hard to choose the optimal clipping threshold. Therefore, in practice, DPSGD-GC suffers from degraded performance due to the {\it constant} bias introduced by the clipping. In our work, we propose a new error-feedback (EF) DP algorithm as an alternative to DPSGD-GC, which not only offers a diminishing utility bound without inducing a constant clipping bias, but more importantly, it allows for an arbitrary choice of clipping threshold that is independent of the problem. We establish an algorithm-specific DP analysis for our proposed algorithm, providing privacy guarantees based on R{茅}nyi DP. Additionally, we demonstrate that under mild conditions, our algorithm can achieve nearly the same utility bound as DPSGD without gradient clipping. Our empirical results on Cifar-10/100 and E2E datasets, show that the proposed algorithm achieves higher accuracies than DPSGD while maintaining the same level of DP guarantee. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14632v2-abstract-full').style.display = 'none'; document.getElementById('2311.14632v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00711">arXiv:2309.00711</a> <span> [<a href="https://arxiv.org/pdf/2309.00711">pdf</a>, <a href="https://arxiv.org/format/2309.00711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Shared Safety Constraints from Multi-task Demonstrations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+K">Konwoo Kim</a>, <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zuxin Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Ding Zhao</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00711v1-abstract-short" style="display: inline;"> Regardless of the particular task we want them to perform in an environment, there are often shared safety constraints we want our agents to respect. For example, regardless of whether it is making a sandwich or clearing the table, a kitchen robot should not break a plate. Manually specifying such a constraint can be both time-consuming and error-prone. We show how to learn constraints from expert… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00711v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00711v1-abstract-full" style="display: none;"> Regardless of the particular task we want them to perform in an environment, there are often shared safety constraints we want our agents to respect. For example, regardless of whether it is making a sandwich or clearing the table, a kitchen robot should not break a plate. Manually specifying such a constraint can be both time-consuming and error-prone. We show how to learn constraints from expert demonstrations of safe task completion by extending inverse reinforcement learning (IRL) techniques to the space of constraints. Intuitively, we learn constraints that forbid highly rewarding behavior that the expert could have taken but chose not to. Unfortunately, the constraint learning problem is rather ill-posed and typically leads to overly conservative constraints that forbid all behavior that the expert did not take. We counter this by leveraging diverse demonstrations that naturally occur in multi-task settings to learn a tighter set of constraints. We validate our method with simulation experiments on high-dimensional continuous control tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00711v1-abstract-full').style.display = 'none'; document.getElementById('2309.00711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.07539">arXiv:2307.07539</a> <span> [<a href="https://arxiv.org/pdf/2307.07539">pdf</a>, <a href="https://arxiv.org/ps/2307.07539">ps</a>, <a href="https://arxiv.org/format/2307.07539">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On the Sublinear Regret of GP-UCB </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Whitehouse%2C+J">Justin Whitehouse</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Ramdas%2C+A">Aaditya Ramdas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.07539v2-abstract-short" style="display: inline;"> In the kernelized bandit problem, a learner aims to sequentially compute the optimum of a function lying in a reproducing kernel Hilbert space given only noisy evaluations at sequentially chosen points. In particular, the learner aims to minimize regret, which is a measure of the suboptimality of the choices made. Arguably the most popular algorithm is the Gaussian Process Upper Confidence Bound (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07539v2-abstract-full').style.display = 'inline'; document.getElementById('2307.07539v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.07539v2-abstract-full" style="display: none;"> In the kernelized bandit problem, a learner aims to sequentially compute the optimum of a function lying in a reproducing kernel Hilbert space given only noisy evaluations at sequentially chosen points. In particular, the learner aims to minimize regret, which is a measure of the suboptimality of the choices made. Arguably the most popular algorithm is the Gaussian Process Upper Confidence Bound (GP-UCB) algorithm, which involves acting based on a simple linear estimator of the unknown function. Despite its popularity, existing analyses of GP-UCB give a suboptimal regret rate, which fails to be sublinear for many commonly used kernels such as the Mat茅rn kernel. This has led to a longstanding open question: are existing regret analyses for GP-UCB tight, or can bounds be improved by using more sophisticated analytical techniques? In this work, we resolve this open question and show that GP-UCB enjoys nearly optimal regret. In particular, our results yield sublinear regret rates for the Mat茅rn kernel, improving over the state-of-the-art analyses and partially resolving a COLT open problem posed by Vakili et al. Our improvements rely on a key technical contribution -- regularizing kernel ridge estimators in proportion to the smoothness of the underlying kernel $k$. Applying this key idea together with a largely overlooked concentration result in separable Hilbert spaces (for which we provide an independent, simplified derivation), we are able to provide a tighter analysis of the GP-UCB algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07539v2-abstract-full').style.display = 'none'; document.getElementById('2307.07539v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 0 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04249">arXiv:2307.04249</a> <span> [<a href="https://arxiv.org/pdf/2307.04249">pdf</a>, <a href="https://arxiv.org/format/2307.04249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Private Data Stream Analysis for Universal Symmetric Norm Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Braverman%2C+V">Vladimir Braverman</a>, <a href="/search/cs?searchtype=author&query=Manning%2C+J">Joel Manning</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Samson Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04249v1-abstract-short" style="display: inline;"> We study how to release summary statistics on a data stream subject to the constraint of differential privacy. In particular, we focus on releasing the family of symmetric norms, which are invariant under sign-flips and coordinate-wise permutations on an input data stream and include $L_p$ norms, $k$-support norms, top-$k$ norms, and the box norm as special cases. Although it may be possible to de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04249v1-abstract-full').style.display = 'inline'; document.getElementById('2307.04249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04249v1-abstract-full" style="display: none;"> We study how to release summary statistics on a data stream subject to the constraint of differential privacy. In particular, we focus on releasing the family of symmetric norms, which are invariant under sign-flips and coordinate-wise permutations on an input data stream and include $L_p$ norms, $k$-support norms, top-$k$ norms, and the box norm as special cases. Although it may be possible to design and analyze a separate mechanism for each symmetric norm, we propose a general parametrizable framework that differentially privately releases a number of sufficient statistics from which the approximation of all symmetric norms can be simultaneously computed. Our framework partitions the coordinates of the underlying frequency vector into different levels based on their magnitude and releases approximate frequencies for the "heavy" coordinates in important levels and releases approximate level sizes for the "light" coordinates in important levels. Surprisingly, our mechanism allows for the release of an arbitrary number of symmetric norm approximations without any overhead or additional loss in privacy. Moreover, our mechanism permits $(1+伪)$-approximation to each of the symmetric norms and can be implemented using sublinear space in the streaming model for many regimes of the accuracy and privacy parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04249v1-abstract-full').style.display = 'none'; document.getElementById('2307.04249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03694">arXiv:2307.03694</a> <span> [<a href="https://arxiv.org/pdf/2307.03694">pdf</a>, <a href="https://arxiv.org/format/2307.03694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Scalable Membership Inference Attacks via Quantile Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bertran%2C+M">Martin Bertran</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shuai Tang</a>, <a href="/search/cs?searchtype=author&query=Kearns%2C+M">Michael Kearns</a>, <a href="/search/cs?searchtype=author&query=Morgenstern%2C+J">Jamie Morgenstern</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+A">Aaron Roth</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03694v1-abstract-short" style="display: inline;"> Membership inference attacks are designed to determine, using black box access to trained models, whether a particular example was used in training or not. Membership inference can be formalized as a hypothesis testing problem. The most effective existing attacks estimate the distribution of some test statistic (usually the model's confidence on the true label) on points that were (and were not) u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03694v1-abstract-full').style.display = 'inline'; document.getElementById('2307.03694v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03694v1-abstract-full" style="display: none;"> Membership inference attacks are designed to determine, using black box access to trained models, whether a particular example was used in training or not. Membership inference can be formalized as a hypothesis testing problem. The most effective existing attacks estimate the distribution of some test statistic (usually the model's confidence on the true label) on points that were (and were not) used in training by training many \emph{shadow models} -- i.e. models of the same architecture as the model being attacked, trained on a random subsample of data. While effective, these attacks are extremely computationally expensive, especially when the model under attack is large. We introduce a new class of attacks based on performing quantile regression on the distribution of confidence scores induced by the model under attack on points that are not used in training. We show that our method is competitive with state-of-the-art shadow model attacks, while requiring substantially less compute because our attack requires training only a single model. Moreover, unlike shadow model attacks, our proposed attack does not require any knowledge of the architecture of the model under attack and is therefore truly ``black-box". We show the efficacy of this approach in an extensive series of experiments on various datasets and model architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03694v1-abstract-full').style.display = 'none'; document.getElementById('2307.03694v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.02295">arXiv:2307.02295</a> <span> [<a href="https://arxiv.org/pdf/2307.02295">pdf</a>, <a href="https://arxiv.org/format/2307.02295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Meta-Learning Adversarial Bandit Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&query=Osadchiy%2C+I">Ilya Osadchiy</a>, <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Balcan%2C+M">Maria-Florina Balcan</a>, <a href="/search/cs?searchtype=author&query=Levy%2C+K+Y">Kfir Y. Levy</a>, <a href="/search/cs?searchtype=author&query=Meir%2C+R">Ron Meir</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.02295v2-abstract-short" style="display: inline;"> We study online meta-learning with bandit feedback, with the goal of improving performance across multiple tasks if they are similar according to some natural similarity measure. As the first to target the adversarial online-within-online partial-information setting, we design meta-algorithms that combine outer learners to simultaneously tune the initialization and other hyperparameters of an inne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.02295v2-abstract-full').style.display = 'inline'; document.getElementById('2307.02295v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.02295v2-abstract-full" style="display: none;"> We study online meta-learning with bandit feedback, with the goal of improving performance across multiple tasks if they are similar according to some natural similarity measure. As the first to target the adversarial online-within-online partial-information setting, we design meta-algorithms that combine outer learners to simultaneously tune the initialization and other hyperparameters of an inner learner for two important cases: multi-armed bandits (MAB) and bandit linear optimization (BLO). For MAB, the meta-learners initialize and set hyperparameters of the Tsallis-entropy generalization of Exp3, with the task-averaged regret improving if the entropy of the optima-in-hindsight is small. For BLO, we learn to initialize and tune online mirror descent (OMD) with self-concordant barrier regularizers, showing that task-averaged regret varies directly with an action space-dependent measure they induce. Our guarantees rely on proving that unregularized follow-the-leader combined with two levels of low-dimensional hyperparameter tuning is enough to learn a sequence of affine functions of non-Lipschitz and sometimes non-convex Bregman divergences bounding the regret of OMD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.02295v2-abstract-full').style.display = 'none'; document.getElementById('2307.02295v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Merger of arXiv:2205.14128 and arXiv:2205.15921, with some additional improvements; to appear in NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01357">arXiv:2307.01357</a> <span> [<a href="https://arxiv.org/pdf/2307.01357">pdf</a>, <a href="https://arxiv.org/format/2307.01357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Principal Component Regression with Applications to Panel Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Anish Agarwal</a>, <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Whitehouse%2C+J">Justin Whitehouse</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01357v3-abstract-short" style="display: inline;"> Principal component regression (PCR) is a popular technique for fixed-design error-in-variables regression, a generalization of the linear regression setting in which the observed covariates are corrupted with random noise. We provide the first time-uniform finite sample guarantees for (regularized) PCR whenever data is collected adaptively. Since the proof techniques for analyzing PCR in the fixe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01357v3-abstract-full').style.display = 'inline'; document.getElementById('2307.01357v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01357v3-abstract-full" style="display: none;"> Principal component regression (PCR) is a popular technique for fixed-design error-in-variables regression, a generalization of the linear regression setting in which the observed covariates are corrupted with random noise. We provide the first time-uniform finite sample guarantees for (regularized) PCR whenever data is collected adaptively. Since the proof techniques for analyzing PCR in the fixed design setting do not readily extend to the online setting, our results rely on adapting tools from modern martingale concentration to the error-in-variables setting. We demonstrate the usefulness of our bounds by applying them to the domain of panel data, a ubiquitous setting in econometrics and statistics. As our first application, we provide a framework for experiment design in panel data settings when interventions are assigned adaptively. Our framework may be thought of as a generalization of the synthetic control and synthetic interventions frameworks, where data is collected via an adaptive intervention assignment policy. Our second application is a procedure for learning such an intervention assignment policy in a setting where units arrive sequentially to be treated. In addition to providing theoretical performance guarantees (as measured by regret), we show that our method empirically outperforms a baseline which does not leverage error-in-variables regression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01357v3-abstract-full').style.display = 'none'; document.getElementById('2307.01357v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13824">arXiv:2306.13824</a> <span> [<a href="https://arxiv.org/pdf/2306.13824">pdf</a>, <a href="https://arxiv.org/format/2306.13824">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Privacy Composition for Accuracy-first Mechanisms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rogers%2C+R">Ryan Rogers</a>, <a href="/search/cs?searchtype=author&query=Samorodnitsky%2C+G">Gennady Samorodnitsky</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Ramdas%2C+A">Aaditya Ramdas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13824v2-abstract-short" style="display: inline;"> In many practical applications of differential privacy, practitioners seek to provide the best privacy guarantees subject to a target level of accuracy. A recent line of work by Ligett et al. '17 and Whitehouse et al. '22 has developed such accuracy-first mechanisms by leveraging the idea of noise reduction that adds correlated noise to the sufficient statistic in a private computation and produce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13824v2-abstract-full').style.display = 'inline'; document.getElementById('2306.13824v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13824v2-abstract-full" style="display: none;"> In many practical applications of differential privacy, practitioners seek to provide the best privacy guarantees subject to a target level of accuracy. A recent line of work by Ligett et al. '17 and Whitehouse et al. '22 has developed such accuracy-first mechanisms by leveraging the idea of noise reduction that adds correlated noise to the sufficient statistic in a private computation and produces a sequence of increasingly accurate answers. A major advantage of noise reduction mechanisms is that the analysts only pay the privacy cost of the least noisy or most accurate answer released. Despite this appealing property in isolation, there has not been a systematic study on how to use them in conjunction with other differentially private mechanisms. A fundamental challenge is that the privacy guarantee for noise reduction mechanisms is (necessarily) formulated as ex-post privacy that bounds the privacy loss as a function of the released outcome. Furthermore, there has yet to be any study on how ex-post private mechanisms compose, which allows us to track the accumulated privacy over several mechanisms. We develop privacy filters [Rogers et al. '16, Feldman and Zrnic '21, and Whitehouse et al. '22'] that allow an analyst to adaptively switch between differentially private and ex-post private mechanisms subject to an overall differential privacy guarantee. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13824v2-abstract-full').style.display = 'none'; document.getElementById('2306.13824v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06250">arXiv:2306.06250</a> <span> [<a href="https://arxiv.org/pdf/2306.06250">pdf</a>, <a href="https://arxiv.org/format/2306.06250">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Strategic Apple Tasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Podimata%2C+C">Chara Podimata</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06250v2-abstract-short" style="display: inline;"> Algorithmic decision-making in high-stakes domains often involves assigning decisions to agents with incentives to strategically modify their input to the algorithm. In addition to dealing with incentives, in many domains of interest (e.g. lending and hiring) the decision-maker only observes feedback regarding their policy for rounds in which they assign a positive decision to the agent; this type… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06250v2-abstract-full').style.display = 'inline'; document.getElementById('2306.06250v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06250v2-abstract-full" style="display: none;"> Algorithmic decision-making in high-stakes domains often involves assigning decisions to agents with incentives to strategically modify their input to the algorithm. In addition to dealing with incentives, in many domains of interest (e.g. lending and hiring) the decision-maker only observes feedback regarding their policy for rounds in which they assign a positive decision to the agent; this type of feedback is often referred to as apple tasting (or one-sided) feedback. We formalize this setting as an online learning problem with apple-tasting feedback where a principal makes decisions about a sequence of $T$ agents, each of which is represented by a context that may be strategically modified. Our goal is to achieve sublinear strategic regret, which compares the performance of the principal to that of the best fixed policy in hindsight, if the agents were truthful when revealing their contexts. Our main result is a learning algorithm which incurs $O (\sqrt{T})$ strategic regret when the sequence of agents is chosen stochastically. We also give an algorithm capable of handling adversarially-chosen agents, albeit at the cost of $O(T^{(d+1)/(d+2)})$ strategic regret (where $d$ is the dimension of the context). Our algorithms can be easily adapted to the setting where the principal receives bandit feedback -- this setting generalizes both the linear contextual bandit problem (by considering agents with incentives) and the strategic classification problem (by allowing for partial feedback). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06250v2-abstract-full').style.display = 'none'; document.getElementById('2306.06250v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In the thirty-seventh Conference on Neural Information Processing Systems (NeurIPS 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.03257">arXiv:2306.03257</a> <span> [<a href="https://arxiv.org/pdf/2306.03257">pdf</a>, <a href="https://arxiv.org/format/2306.03257">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generating Private Synthetic Data with Genetic Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Terrance Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jingwu Tang</a>, <a href="/search/cs?searchtype=author&query=Vietri%2C+G">Giuseppe Vietri</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.03257v1-abstract-short" style="display: inline;"> We study the problem of efficiently generating differentially private synthetic data that approximate the statistical properties of an underlying sensitive dataset. In recent years, there has been a growing line of work that approaches this problem using first-order optimization techniques. However, such techniques are restricted to optimizing differentiable objectives only, severely limiting the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.03257v1-abstract-full').style.display = 'inline'; document.getElementById('2306.03257v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.03257v1-abstract-full" style="display: none;"> We study the problem of efficiently generating differentially private synthetic data that approximate the statistical properties of an underlying sensitive dataset. In recent years, there has been a growing line of work that approaches this problem using first-order optimization techniques. However, such techniques are restricted to optimizing differentiable objectives only, severely limiting the types of analyses that can be conducted. For example, first-order mechanisms have been primarily successful in approximating statistical queries only in the form of marginals for discrete data domains. In some cases, one can circumvent such issues by relaxing the task's objective to maintain differentiability. However, even when possible, these approaches impose a fundamental limitation in which modifications to the minimization problem become additional sources of error. Therefore, we propose Private-GSD, a private genetic algorithm based on zeroth-order optimization heuristics that do not require modifying the original objective. As a result, it avoids the aforementioned limitations of first-order optimization. We empirically evaluate Private-GSD against baseline algorithms on data derived from the American Community Survey across a variety of statistics--otherwise known as statistical queries--both for discrete and real-valued attributes. We show that Private-GSD outperforms the state-of-the-art methods on non-differential queries while matching accuracy in approximating differentiable ones. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.03257v1-abstract-full').style.display = 'none'; document.getElementById('2306.03257v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14623">arXiv:2303.14623</a> <span> [<a href="https://arxiv.org/pdf/2303.14623">pdf</a>, <a href="https://arxiv.org/format/2303.14623">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Inverse Reinforcement Learning without Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a>, <a href="/search/cs?searchtype=author&query=Bagnell%2C+J+A">J. Andrew Bagnell</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14623v4-abstract-short" style="display: inline;"> Inverse Reinforcement Learning (IRL) is a powerful set of techniques for imitation learning that aims to learn a reward function that rationalizes expert demonstrations. Unfortunately, traditional IRL methods suffer from a computational weakness: they require repeatedly solving a hard reinforcement learning (RL) problem as a subroutine. This is counter-intuitive from the viewpoint of reductions: w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14623v4-abstract-full').style.display = 'inline'; document.getElementById('2303.14623v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14623v4-abstract-full" style="display: none;"> Inverse Reinforcement Learning (IRL) is a powerful set of techniques for imitation learning that aims to learn a reward function that rationalizes expert demonstrations. Unfortunately, traditional IRL methods suffer from a computational weakness: they require repeatedly solving a hard reinforcement learning (RL) problem as a subroutine. This is counter-intuitive from the viewpoint of reductions: we have reduced the easier problem of imitation learning to repeatedly solving the harder problem of RL. Another thread of work has proved that access to the side-information of the distribution of states where a strong policy spends time can dramatically reduce the sample and computational complexities of solving an RL problem. In this work, we demonstrate for the first time a more informed imitation learning reduction where we utilize the state distribution of the expert to alleviate the global exploration component of the RL subroutine, providing an exponential speedup in theory. In practice, we find that we are able to significantly speed up the prior art on continuous control tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14623v4-abstract-full').style.display = 'none'; document.getElementById('2303.14623v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.03451">arXiv:2303.03451</a> <span> [<a href="https://arxiv.org/pdf/2303.03451">pdf</a>, <a href="https://arxiv.org/format/2303.03451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Improved Differentially Private Regression via Gradient Boosting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shuai Tang</a>, <a href="/search/cs?searchtype=author&query=Aydore%2C+S">Sergul Aydore</a>, <a href="/search/cs?searchtype=author&query=Kearns%2C+M">Michael Kearns</a>, <a href="/search/cs?searchtype=author&query=Rho%2C+S">Saeyoung Rho</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+A">Aaron Roth</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yichen Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu-Xiang Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.03451v2-abstract-short" style="display: inline;"> We revisit the problem of differentially private squared error linear regression. We observe that existing state-of-the-art methods are sensitive to the choice of hyperparameters -- including the ``clipping threshold'' that cannot be set optimally in a data-independent way. We give a new algorithm for private linear regression based on gradient boosting. We show that our method consistently improv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03451v2-abstract-full').style.display = 'inline'; document.getElementById('2303.03451v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.03451v2-abstract-full" style="display: none;"> We revisit the problem of differentially private squared error linear regression. We observe that existing state-of-the-art methods are sensitive to the choice of hyperparameters -- including the ``clipping threshold'' that cannot be set optimally in a data-independent way. We give a new algorithm for private linear regression based on gradient boosting. We show that our method consistently improves over the previous state of the art when the clipping threshold is taken to be fixed without knowledge of the data, rather than optimized in a non-private way -- and that even when we optimize the hyperparameters of competitor algorithms non-privately, our algorithm is no worse and often better. In addition to a comprehensive set of experiments, we give theoretical insights to explain this behavior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03451v2-abstract-full').style.display = 'none'; document.getElementById('2303.03451v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01256">arXiv:2303.01256</a> <span> [<a href="https://arxiv.org/pdf/2303.01256">pdf</a>, <a href="https://arxiv.org/format/2303.01256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Choosing Public Datasets for Private Machine Learning via Gradient Subspace Distance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+X">Xin Gu</a>, <a href="/search/cs?searchtype=author&query=Kamath%2C+G">Gautam Kamath</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01256v1-abstract-short" style="display: inline;"> Differentially private stochastic gradient descent privatizes model training by injecting noise into each iteration, where the noise magnitude increases with the number of model parameters. Recent works suggest that we can reduce the noise by leveraging public data for private machine learning, by projecting gradients onto a subspace prescribed by the public data. However, given a choice of public… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01256v1-abstract-full').style.display = 'inline'; document.getElementById('2303.01256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01256v1-abstract-full" style="display: none;"> Differentially private stochastic gradient descent privatizes model training by injecting noise into each iteration, where the noise magnitude increases with the number of model parameters. Recent works suggest that we can reduce the noise by leveraging public data for private machine learning, by projecting gradients onto a subspace prescribed by the public data. However, given a choice of public datasets, it is not a priori clear which one may be most appropriate for the private task. We give an algorithm for selecting a public dataset by measuring a low-dimensional subspace distance between gradients of the public and private examples. We provide theoretical analysis demonstrating that the excess risk scales with this subspace distance. This distance is easy to compute and robust to modifications in the setting. Empirical evaluation shows that trained model accuracy is monotone in this distance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01256v1-abstract-full').style.display = 'none'; document.getElementById('2303.01256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.11121">arXiv:2302.11121</a> <span> [<a href="https://arxiv.org/pdf/2302.11121">pdf</a>, <a href="https://arxiv.org/format/2302.11121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3593013.3594101">10.1145/3593013.3594101 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Counterfactual Prediction Under Outcome Measurement Error </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guerdan%2C+L">Luke Guerdan</a>, <a href="/search/cs?searchtype=author&query=Coston%2C+A">Amanda Coston</a>, <a href="/search/cs?searchtype=author&query=Holstein%2C+K">Kenneth Holstein</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.11121v2-abstract-short" style="display: inline;"> Across domains such as medicine, employment, and criminal justice, predictive models often target labels that imperfectly reflect the outcomes of interest to experts and policymakers. For example, clinical risk assessments deployed to inform physician decision-making often predict measures of healthcare utilization (e.g., costs, hospitalization) as a proxy for patient medical need. These proxies c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11121v2-abstract-full').style.display = 'inline'; document.getElementById('2302.11121v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.11121v2-abstract-full" style="display: none;"> Across domains such as medicine, employment, and criminal justice, predictive models often target labels that imperfectly reflect the outcomes of interest to experts and policymakers. For example, clinical risk assessments deployed to inform physician decision-making often predict measures of healthcare utilization (e.g., costs, hospitalization) as a proxy for patient medical need. These proxies can be subject to outcome measurement error when they systematically differ from the target outcome they are intended to measure. However, prior modeling efforts to characterize and mitigate outcome measurement error overlook the fact that the decision being informed by a model often serves as a risk-mitigating intervention that impacts the target outcome of interest and its recorded proxy. Thus, in these settings, addressing measurement error requires counterfactual modeling of treatment effects on outcomes. In this work, we study intersectional threats to model reliability introduced by outcome measurement error, treatment effects, and selection bias from historical decision-making policies. We develop an unbiased risk minimization method which, given knowledge of proxy measurement error properties, corrects for the combined effects of these challenges. We also develop a method for estimating treatment-dependent measurement error parameters when these are unknown in advance. We demonstrate the utility of our approach theoretically and via experiments on real-world data from randomized controlled trials conducted in healthcare and employment domains. As importantly, we demonstrate that models correcting for outcome measurement error or treatment effects alone suffer from considerable reliability limitations. Our work underscores the importance of considering intersectional threats to model validity during the design and evaluation of predictive models for decision support. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11121v2-abstract-full').style.display = 'none'; document.getElementById('2302.11121v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">FAccT 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08533">arXiv:2302.08533</a> <span> [<a href="https://arxiv.org/pdf/2302.08533">pdf</a>, <a href="https://arxiv.org/format/2302.08533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Federated Learning as a Network Effects Game </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Ngo%2C+D+D">Dung Daniel Ngo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shuran Zheng</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08533v1-abstract-short" style="display: inline;"> Federated Learning (FL) aims to foster collaboration among a population of clients to improve the accuracy of machine learning without directly sharing local data. Although there has been rich literature on designing federated learning algorithms, most prior works implicitly assume that all clients are willing to participate in a FL scheme. In practice, clients may not benefit from joining in FL,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08533v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08533v1-abstract-full" style="display: none;"> Federated Learning (FL) aims to foster collaboration among a population of clients to improve the accuracy of machine learning without directly sharing local data. Although there has been rich literature on designing federated learning algorithms, most prior works implicitly assume that all clients are willing to participate in a FL scheme. In practice, clients may not benefit from joining in FL, especially in light of potential costs related to issues such as privacy and computation. In this work, we study the clients' incentives in federated learning to help the service provider design better solutions and ensure clients make better decisions. We are the first to model clients' behaviors in FL as a network effects game, where each client's benefit depends on other clients who also join the network. Using this setup we analyze the dynamics of clients' participation and characterize the equilibrium, where no client has incentives to alter their decision. Specifically, we show that dynamics in the population naturally converge to equilibrium without needing explicit interventions. Finally, we provide a cost-efficient payment scheme that incentivizes clients to reach a desired equilibrium when the initial network is empty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08533v1-abstract-full').style.display = 'none'; document.getElementById('2302.08533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages of main text, 26 pages in total</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.06503">arXiv:2302.06503</a> <span> [<a href="https://arxiv.org/pdf/2302.06503">pdf</a>, <a href="https://arxiv.org/format/2302.06503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Ground(less) Truth: A Causal Framework for Proxy Labels in Human-Algorithm Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guerdan%2C+L">Luke Guerdan</a>, <a href="/search/cs?searchtype=author&query=Coston%2C+A">Amanda Coston</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Holstein%2C+K">Kenneth Holstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.06503v4-abstract-short" style="display: inline;"> A growing literature on human-AI decision-making investigates strategies for combining human judgment with statistical models to improve decision-making. Research in this area often evaluates proposed improvements to models, interfaces, or workflows by demonstrating improved predictive performance on "ground truth" labels. However, this practice overlooks a key difference between human judgments a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.06503v4-abstract-full').style.display = 'inline'; document.getElementById('2302.06503v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.06503v4-abstract-full" style="display: none;"> A growing literature on human-AI decision-making investigates strategies for combining human judgment with statistical models to improve decision-making. Research in this area often evaluates proposed improvements to models, interfaces, or workflows by demonstrating improved predictive performance on "ground truth" labels. However, this practice overlooks a key difference between human judgments and model predictions. Whereas humans reason about broader phenomena of interest in a decision -- including latent constructs that are not directly observable, such as disease status, the "toxicity" of online comments, or future "job performance" -- predictive models target proxy labels that are readily available in existing datasets. Predictive models' reliance on simplistic proxies makes them vulnerable to various sources of statistical bias. In this paper, we identify five sources of target variable bias that can impact the validity of proxy labels in human-AI decision-making tasks. We develop a causal framework to disentangle the relationship between each bias and clarify which are of concern in specific human-AI decision-making tasks. We demonstrate how our framework can be used to articulate implicit assumptions made in prior modeling work, and we recommend evaluation strategies for verifying whether these assumptions hold in practice. We then leverage our framework to re-examine the designs of prior human subjects experiments that investigate human-AI decision-making, finding that only a small fraction of studies examine factors related to target variable bias. We conclude by discussing opportunities to better address target variable bias in future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.06503v4-abstract-full').style.display = 'none'; document.getElementById('2302.06503v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">FAccT 23'</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.14236">arXiv:2211.14236</a> <span> [<a href="https://arxiv.org/pdf/2211.14236">pdf</a>, <a href="https://arxiv.org/format/2211.14236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Strategyproof Decision-Making in Panel Data Settings and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Anish Agarwal</a>, <a href="/search/cs?searchtype=author&query=Podimata%2C+C">Chara Podimata</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.14236v4-abstract-short" style="display: inline;"> We consider the problem of decision-making using panel data, in which a decision-maker gets noisy, repeated measurements of multiple units (or agents). We consider a setup where there is a pre-intervention period, when the principal observes the outcomes of each unit, after which the principal uses these observations to assign a treatment to each unit. Unlike this classical setting, we permit the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14236v4-abstract-full').style.display = 'inline'; document.getElementById('2211.14236v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.14236v4-abstract-full" style="display: none;"> We consider the problem of decision-making using panel data, in which a decision-maker gets noisy, repeated measurements of multiple units (or agents). We consider a setup where there is a pre-intervention period, when the principal observes the outcomes of each unit, after which the principal uses these observations to assign a treatment to each unit. Unlike this classical setting, we permit the units generating the panel data to be strategic, i.e. units may modify their pre-intervention outcomes in order to receive a more desirable intervention. The principal's goal is to design a strategyproof intervention policy, i.e. a policy that assigns units to their utility-maximizing interventions despite their potential strategizing. We first identify a necessary and sufficient condition under which a strategyproof intervention policy exists, and provide a strategyproof mechanism with a simple closed form when one does exist. Along the way, we prove impossibility results for strategic multiclass classification, which may be of independent interest. When there are two interventions, we establish that there always exists a strategyproof mechanism, and provide an algorithm for learning such a mechanism. For three or more interventions, we provide an algorithm for learning a strategyproof mechanism if there exists a sufficiently large gap in the principal's rewards between different interventions. Finally, we empirically evaluate our model using real-world panel data collected from product sales over 18 months. We find that our methods compare favorably to baselines which do not take strategic interactions into consideration, even in the presence of model misspecification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14236v4-abstract-full').style.display = 'none'; document.getElementById('2211.14236v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In the fiftieth ACM SIGMETRICS International Conference on Measurement and Modeling of Computer Systems (SIGMETRICS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03994">arXiv:2211.03994</a> <span> [<a href="https://arxiv.org/pdf/2211.03994">pdf</a>, <a href="https://arxiv.org/format/2211.03994">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning with Stepwise Fairness Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhun Deng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">He Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linjun Zhang</a>, <a href="/search/cs?searchtype=author&query=Parkes%2C+D+C">David C. Parkes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03994v1-abstract-short" style="display: inline;"> AI methods are used in societally important settings, ranging from credit to employment to housing, and it is crucial to provide fairness in regard to algorithmic decision making. Moreover, many settings are dynamic, with populations responding to sequential decision policies. We introduce the study of reinforcement learning (RL) with stepwise fairness constraints, requiring group fairness at each… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03994v1-abstract-full').style.display = 'inline'; document.getElementById('2211.03994v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03994v1-abstract-full" style="display: none;"> AI methods are used in societally important settings, ranging from credit to employment to housing, and it is crucial to provide fairness in regard to algorithmic decision making. Moreover, many settings are dynamic, with populations responding to sequential decision policies. We introduce the study of reinforcement learning (RL) with stepwise fairness constraints, requiring group fairness at each time step. Our focus is on tabular episodic RL, and we provide learning algorithms with strong theoretical guarantees in regard to policy optimality and fairness violation. Our framework provides useful tools to study the impact of fairness constraints in sequential settings and brings up new challenges in RL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03994v1-abstract-full').style.display = 'none'; document.getElementById('2211.03994v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Fairness, Reinforcement Learning</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03128">arXiv:2211.03128</a> <span> [<a href="https://arxiv.org/pdf/2211.03128">pdf</a>, <a href="https://arxiv.org/format/2211.03128">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1073/pnas.2218605120">10.1073/pnas.2218605120 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Confidence-Ranked Reconstruction of Census Microdata from Published Statistics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dick%2C+T">Travis Dick</a>, <a href="/search/cs?searchtype=author&query=Dwork%2C+C">Cynthia Dwork</a>, <a href="/search/cs?searchtype=author&query=Kearns%2C+M">Michael Kearns</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Terrance Liu</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+A">Aaron Roth</a>, <a href="/search/cs?searchtype=author&query=Vietri%2C+G">Giuseppe Vietri</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03128v2-abstract-short" style="display: inline;"> A reconstruction attack on a private dataset $D$ takes as input some publicly accessible information about the dataset and produces a list of candidate elements of $D$. We introduce a new class of data reconstruction attacks based on randomized methods for non-convex optimization. We empirically demonstrate that our attacks can not only reconstruct full rows of $D$ from aggregate query statistics… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03128v2-abstract-full').style.display = 'inline'; document.getElementById('2211.03128v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03128v2-abstract-full" style="display: none;"> A reconstruction attack on a private dataset $D$ takes as input some publicly accessible information about the dataset and produces a list of candidate elements of $D$. We introduce a new class of data reconstruction attacks based on randomized methods for non-convex optimization. We empirically demonstrate that our attacks can not only reconstruct full rows of $D$ from aggregate query statistics $Q(D)\in \mathbb{R}^m$, but can do so in a way that reliably ranks reconstructed rows by their odds of appearing in the private data, providing a signature that could be used for prioritizing reconstructed rows for further actions such as identify theft or hate crime. We also design a sequence of baselines for evaluating reconstruction attacks. Our attacks significantly outperform those that are based only on access to a public distribution or population from which the private dataset $D$ was sampled, demonstrating that they are exploiting information in the aggregate statistics $Q(D)$, and not simply the overall structure of the distribution. In other words, the queries $Q(D)$ are permitting reconstruction of elements of this dataset, not the distribution from which $D$ was drawn. These findings are established both on 2010 U.S. decennial Census data and queries and Census-derived American Community Survey datasets. Taken together, our methods and experiments illustrate the risks in releasing numerically precise aggregate statistics of a large dataset, and provide further motivation for the careful application of provably private techniques such as differential privacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03128v2-abstract-full').style.display = 'none'; document.getElementById('2211.03128v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.14110">arXiv:2209.14110</a> <span> [<a href="https://arxiv.org/pdf/2209.14110">pdf</a>, <a href="https://arxiv.org/format/2209.14110">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Meta-Learning in Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Harris%2C+K">Keegan Harris</a>, <a href="/search/cs?searchtype=author&query=Anagnostides%2C+I">Ioannis Anagnostides</a>, <a href="/search/cs?searchtype=author&query=Farina%2C+G">Gabriele Farina</a>, <a href="/search/cs?searchtype=author&query=Khodak%2C+M">Mikhail Khodak</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Sandholm%2C+T">Tuomas Sandholm</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.14110v2-abstract-short" style="display: inline;"> In the literature on game-theoretic equilibrium finding, focus has mainly been on solving a single game in isolation. In practice, however, strategic interactions -- ranging from routing problems to online advertising auctions -- evolve dynamically, thereby leading to many similar games to be solved. To address this gap, we introduce meta-learning for equilibrium finding and learning to play games… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14110v2-abstract-full').style.display = 'inline'; document.getElementById('2209.14110v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.14110v2-abstract-full" style="display: none;"> In the literature on game-theoretic equilibrium finding, focus has mainly been on solving a single game in isolation. In practice, however, strategic interactions -- ranging from routing problems to online advertising auctions -- evolve dynamically, thereby leading to many similar games to be solved. To address this gap, we introduce meta-learning for equilibrium finding and learning to play games. We establish the first meta-learning guarantees for a variety of fundamental and well-studied classes of games, including two-player zero-sum games, general-sum games, and Stackelberg games. In particular, we obtain rates of convergence to different game-theoretic equilibria that depend on natural notions of similarity between the sequence of games encountered, while at the same time recovering the known single-game guarantees when the sequence of games is arbitrary. Along the way, we prove a number of new results in the single-game regime through a simple and unified framework, which may be of independent interest. Finally, we evaluate our meta-learning algorithms on endgames faced by the poker agent Libratus against top human professionals. The experiments show that games with varying stack sizes can be solved significantly faster using our meta-learning techniques than by solving them separately, often by an order of magnitude. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14110v2-abstract-full').style.display = 'none'; document.getElementById('2209.14110v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In the eleventh Conference on Learning Representations (ICLR 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.07400">arXiv:2209.07400</a> <span> [<a href="https://arxiv.org/pdf/2209.07400">pdf</a>, <a href="https://arxiv.org/format/2209.07400">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Private Synthetic Data for Multitask Learning and Marginal Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vietri%2C+G">Giuseppe Vietri</a>, <a href="/search/cs?searchtype=author&query=Archambeau%2C+C">Cedric Archambeau</a>, <a href="/search/cs?searchtype=author&query=Aydore%2C+S">Sergul Aydore</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+W">William Brown</a>, <a href="/search/cs?searchtype=author&query=Kearns%2C+M">Michael Kearns</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+A">Aaron Roth</a>, <a href="/search/cs?searchtype=author&query=Siva%2C+A">Ankit Siva</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shuai Tang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.07400v1-abstract-short" style="display: inline;"> We provide a differentially private algorithm for producing synthetic data simultaneously useful for multiple tasks: marginal queries and multitask machine learning (ML). A key innovation in our algorithm is the ability to directly handle numerical features, in contrast to a number of related prior approaches which require numerical features to be first converted into {high cardinality} categorica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.07400v1-abstract-full').style.display = 'inline'; document.getElementById('2209.07400v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.07400v1-abstract-full" style="display: none;"> We provide a differentially private algorithm for producing synthetic data simultaneously useful for multiple tasks: marginal queries and multitask machine learning (ML). A key innovation in our algorithm is the ability to directly handle numerical features, in contrast to a number of related prior approaches which require numerical features to be first converted into {high cardinality} categorical features via {a binning strategy}. Higher binning granularity is required for better accuracy, but this negatively impacts scalability. Eliminating the need for binning allows us to produce synthetic data preserving large numbers of statistical queries such as marginals on numerical features, and class conditional linear threshold queries. Preserving the latter means that the fraction of points of each class label above a particular half-space is roughly the same in both the real and synthetic data. This is the property that is needed to train a linear classifier in a multitask setting. Our algorithm also allows us to produce high quality synthetic data for mixed marginal queries, that combine both categorical and numerical features. Our method consistently runs 2-5x faster than the best comparable techniques, and provides significant accuracy improvements in both marginal queries and linear prediction tasks for mixed-type datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.07400v1-abstract-full').style.display = 'none'; document.getElementById('2209.07400v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The short version of this paper appears in the proceedings of NeurIPS-22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.09551">arXiv:2208.09551</a> <span> [<a href="https://arxiv.org/pdf/2208.09551">pdf</a>, <a href="https://arxiv.org/ps/2208.09551">ps</a>, <a href="https://arxiv.org/format/2208.09551">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Game-Theoretic Algorithms for Conditional Moment Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a>, <a href="/search/cs?searchtype=author&query=Bagnell%2C+J+A">J. Andrew Bagnell</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.09551v1-abstract-short" style="display: inline;"> A variety of problems in econometrics and machine learning, including instrumental variable regression and Bellman residual minimization, can be formulated as satisfying a set of conditional moment restrictions (CMR). We derive a general, game-theoretic strategy for satisfying CMR that scales to nonlinear problems, is amenable to gradient-based optimization, and is able to account for finite sampl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09551v1-abstract-full').style.display = 'inline'; document.getElementById('2208.09551v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.09551v1-abstract-full" style="display: none;"> A variety of problems in econometrics and machine learning, including instrumental variable regression and Bellman residual minimization, can be formulated as satisfying a set of conditional moment restrictions (CMR). We derive a general, game-theoretic strategy for satisfying CMR that scales to nonlinear problems, is amenable to gradient-based optimization, and is able to account for finite sample uncertainty. We recover the approaches of Dikkala et al. and Dai et al. as special cases of our general framework before detailing various extensions and how to efficiently solve the game defined by CMR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09551v1-abstract-full').style.display = 'none'; document.getElementById('2208.09551v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.02225">arXiv:2208.02225</a> <span> [<a href="https://arxiv.org/pdf/2208.02225">pdf</a>, <a href="https://arxiv.org/format/2208.02225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sequence Model Imitation Learning with Unobserved Contexts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a>, <a href="/search/cs?searchtype=author&query=Bagnell%2C+J+A">J. Andrew Bagnell</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.02225v3-abstract-short" style="display: inline;"> We consider imitation learning problems where the learner's ability to mimic the expert increases throughout the course of an episode as more information is revealed. One example of this is when the expert has access to privileged information: while the learner might not be able to accurately reproduce expert behavior early on in an episode, by considering the entire history of states and actions,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02225v3-abstract-full').style.display = 'inline'; document.getElementById('2208.02225v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.02225v3-abstract-full" style="display: none;"> We consider imitation learning problems where the learner's ability to mimic the expert increases throughout the course of an episode as more information is revealed. One example of this is when the expert has access to privileged information: while the learner might not be able to accurately reproduce expert behavior early on in an episode, by considering the entire history of states and actions, they might be able to eventually identify the hidden context and act as the expert would. We prove that on-policy imitation learning algorithms (with or without access to a queryable expert) are better equipped to handle these sorts of asymptotically realizable problems than off-policy methods. This is because on-policy algorithms provably learn to recover from their initially suboptimal actions, while off-policy methods treat their suboptimal past actions as though they came from the expert. This often manifests as a latching behavior: a naive repetition of past actions. We conduct experiments in a toy bandit domain that show that there exist sharp phase transitions of whether off-policy approaches are able to match expert performance asymptotically, in contrast to the uniformly good performance of on-policy approaches. We demonstrate that on several continuous control tasks, on-policy approaches are able to use history to identify the context while off-policy approaches actually perform worse when given access to history. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02225v3-abstract-full').style.display = 'none'; document.getElementById('2208.02225v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.07902">arXiv:2206.07902</a> <span> [<a href="https://arxiv.org/pdf/2206.07902">pdf</a>, <a href="https://arxiv.org/format/2206.07902">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On Privacy and Personalization in Cross-Silo Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziyu Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+V">Virginia Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.07902v2-abstract-short" style="display: inline;"> While the application of differential privacy (DP) has been well-studied in cross-device federated learning (FL), there is a lack of work considering DP and its implications for cross-silo FL, a setting characterized by a limited number of clients each containing many data subjects. In cross-silo FL, usual notions of client-level DP are less suitable as real-world privacy regulations typically con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.07902v2-abstract-full').style.display = 'inline'; document.getElementById('2206.07902v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.07902v2-abstract-full" style="display: none;"> While the application of differential privacy (DP) has been well-studied in cross-device federated learning (FL), there is a lack of work considering DP and its implications for cross-silo FL, a setting characterized by a limited number of clients each containing many data subjects. In cross-silo FL, usual notions of client-level DP are less suitable as real-world privacy regulations typically concern the in-silo data subjects rather than the silos themselves. In this work, we instead consider an alternative notion of silo-specific sample-level DP, where silos set their own privacy targets for their local examples. Under this setting, we reconsider the roles of personalization in federated learning. In particular, we show that mean-regularized multi-task learning (MR-MTL), a simple personalization framework, is a strong baseline for cross-silo FL: under stronger privacy requirements, silos are incentivized to federate more with each other to mitigate DP noise, resulting in consistent improvements relative to standard baseline methods. We provide an empirical study of competing methods as well as a theoretical characterization of MR-MTL for mean estimation, highlighting the interplay between privacy and cross-silo data heterogeneity. Our work serves to establish baselines for private cross-silo FL as well as identify key directions of future work in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.07902v2-abstract-full').style.display = 'none'; document.getElementById('2206.07902v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022, 37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.07234">arXiv:2206.07234</a> <span> [<a href="https://arxiv.org/pdf/2206.07234">pdf</a>, <a href="https://arxiv.org/format/2206.07234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Brownian Noise Reduction: Maximizing Privacy Subject to Accuracy Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Whitehouse%2C+J">Justin Whitehouse</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Ramdas%2C+A">Aaditya Ramdas</a>, <a href="/search/cs?searchtype=author&query=Rogers%2C+R">Ryan Rogers</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.07234v4-abstract-short" style="display: inline;"> There is a disconnect between how researchers and practitioners handle privacy-utility tradeoffs. Researchers primarily operate from a privacy first perspective, setting strict privacy requirements and minimizing risk subject to these constraints. Practitioners often desire an accuracy first perspective, possibly satisfied with the greatest privacy they can get subject to obtaining sufficiently sm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.07234v4-abstract-full').style.display = 'inline'; document.getElementById('2206.07234v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.07234v4-abstract-full" style="display: none;"> There is a disconnect between how researchers and practitioners handle privacy-utility tradeoffs. Researchers primarily operate from a privacy first perspective, setting strict privacy requirements and minimizing risk subject to these constraints. Practitioners often desire an accuracy first perspective, possibly satisfied with the greatest privacy they can get subject to obtaining sufficiently small error. Ligett et al. have introduced a "noise reduction" algorithm to address the latter perspective. The authors show that by adding correlated Laplace noise and progressively reducing it on demand, it is possible to produce a sequence of increasingly accurate estimates of a private parameter while only paying a privacy cost for the least noisy iterate released. In this work, we generalize noise reduction to the setting of Gaussian noise, introducing the Brownian mechanism. The Brownian mechanism works by first adding Gaussian noise of high variance corresponding to the final point of a simulated Brownian motion. Then, at the practitioner's discretion, noise is gradually decreased by tracing back along the Brownian path to an earlier time. Our mechanism is more naturally applicable to the common setting of bounded $\ell_2$-sensitivity, empirically outperforms existing work on common statistical tasks, and provides customizable control of privacy loss over the entire interaction with the practitioner. We complement our Brownian mechanism with ReducedAboveThreshold, a generalization of the classical AboveThreshold algorithm that provides adaptive privacy guarantees. Overall, our results demonstrate that one can meet utility constraints while still maintaining strong levels of privacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.07234v4-abstract-full').style.display = 'none'; document.getElementById('2206.07234v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.05942">arXiv:2206.05942</a> <span> [<a href="https://arxiv.org/pdf/2206.05942">pdf</a>, <a href="https://arxiv.org/format/2206.05942">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Private Synthetic Data with Hierarchical Structure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Terrance Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.05942v1-abstract-short" style="display: inline;"> We study the problem of differentially private synthetic data generation for hierarchical datasets in which individual data points are grouped together (e.g., people within households). In particular, to measure the similarity between the synthetic dataset and the underlying private one, we frame our objective under the problem of private query release, generating a synthetic dataset that preserve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05942v1-abstract-full').style.display = 'inline'; document.getElementById('2206.05942v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.05942v1-abstract-full" style="display: none;"> We study the problem of differentially private synthetic data generation for hierarchical datasets in which individual data points are grouped together (e.g., people within households). In particular, to measure the similarity between the synthetic dataset and the underlying private one, we frame our objective under the problem of private query release, generating a synthetic dataset that preserves answers for some collection of queries (i.e., statistics like mean aggregate counts). However, while the application of private synthetic data to the problem of query release has been well studied, such research is restricted to non-hierarchical data domains, raising the initial question -- what queries are important when considering data of this form? Moreover, it has not yet been established how one can generate synthetic data at both the group and individual-level while capturing such statistics. In light of these challenges, we first formalize the problem of hierarchical query release, in which the goal is to release a collection of statistics for some hierarchical dataset. Specifically, we provide a general set of statistical queries that captures relationships between attributes at both the group and individual-level. Subsequently, we introduce private synthetic data algorithms for hierarchical query release and evaluate them on hierarchical datasets derived from the American Community Survey and Allegheny Family Screening Tool data. Finally, we look to the American Community Survey, whose inherent hierarchical structure gives rise to another set of domain-specific queries that we run experiments with. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05942v1-abstract-full').style.display = 'none'; document.getElementById('2206.05942v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.00494">arXiv:2206.00494</a> <span> [<a href="https://arxiv.org/pdf/2206.00494">pdf</a>, <a href="https://arxiv.org/ps/2206.00494">ps</a>, <a href="https://arxiv.org/format/2206.00494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Incentivizing Combinatorial Bandit Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xinyan Hu</a>, <a href="/search/cs?searchtype=author&query=Ngo%2C+D+D">Dung Daniel Ngo</a>, <a href="/search/cs?searchtype=author&query=Slivkins%2C+A">Aleksandrs Slivkins</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.00494v1-abstract-short" style="display: inline;"> Consider a bandit algorithm that recommends actions to self-interested users in a recommendation system. The users are free to choose other actions and need to be incentivized to follow the algorithm's recommendations. While the users prefer to exploit, the algorithm can incentivize them to explore by leveraging the information collected from the previous users. All published work on this problem,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.00494v1-abstract-full').style.display = 'inline'; document.getElementById('2206.00494v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.00494v1-abstract-full" style="display: none;"> Consider a bandit algorithm that recommends actions to self-interested users in a recommendation system. The users are free to choose other actions and need to be incentivized to follow the algorithm's recommendations. While the users prefer to exploit, the algorithm can incentivize them to explore by leveraging the information collected from the previous users. All published work on this problem, known as incentivized exploration, focuses on small, unstructured action sets and mainly targets the case when the users' beliefs are independent across actions. However, realistic exploration problems often feature large, structured action sets and highly correlated beliefs. We focus on a paradigmatic exploration problem with structure: combinatorial semi-bandits. We prove that Thompson Sampling, when applied to combinatorial semi-bandits, is incentive-compatible when initialized with a sufficient number of samples of each arm (where this number is determined in advance by the Bayesian prior). Moreover, we design incentive-compatible algorithms for collecting the initial samples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.00494v1-abstract-full').style.display = 'none'; document.getElementById('2206.00494v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages of main text, 21 pages in total</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.15397">arXiv:2205.15397</a> <span> [<a href="https://arxiv.org/pdf/2205.15397">pdf</a>, <a href="https://arxiv.org/format/2205.15397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Minimax Optimal Online Imitation Learning via Replay Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Swamy%2C+G">Gokul Swamy</a>, <a href="/search/cs?searchtype=author&query=Rajaraman%2C+N">Nived Rajaraman</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+M">Matthew Peng</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a>, <a href="/search/cs?searchtype=author&query=Bagnell%2C+J+A">J. Andrew Bagnell</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+J">Jiantao Jiao</a>, <a href="/search/cs?searchtype=author&query=Ramchandran%2C+K">Kannan Ramchandran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.15397v5-abstract-short" style="display: inline;"> Online imitation learning is the problem of how best to mimic expert demonstrations, given access to the environment or an accurate simulator. Prior work has shown that in the infinite sample regime, exact moment matching achieves value equivalence to the expert policy. However, in the finite sample regime, even if one has no optimization error, empirical variance can lead to a performance gap tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.15397v5-abstract-full').style.display = 'inline'; document.getElementById('2205.15397v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.15397v5-abstract-full" style="display: none;"> Online imitation learning is the problem of how best to mimic expert demonstrations, given access to the environment or an accurate simulator. Prior work has shown that in the infinite sample regime, exact moment matching achieves value equivalence to the expert policy. However, in the finite sample regime, even if one has no optimization error, empirical variance can lead to a performance gap that scales with $H^2 / N$ for behavioral cloning and $H / \sqrt{N}$ for online moment matching, where $H$ is the horizon and $N$ is the size of the expert dataset. We introduce the technique of replay estimation to reduce this empirical variance: by repeatedly executing cached expert actions in a stochastic simulator, we compute a smoother expert visitation distribution estimate to match. In the presence of general function approximation, we prove a meta theorem reducing the performance gap of our approach to the parameter estimation error for offline classification (i.e. learning the expert policy). In the tabular setting or with linear function approximation, our meta theorem shows that the performance gap incurred by our approach achieves the optimal $\widetilde{O} \left( \min({H^{3/2}} / {N}, {H} / {\sqrt{N}} \right)$ dependency, under significantly weaker assumptions compared to prior work. We implement multiple instantiations of our approach on several continuous control tasks and find that we are able to significantly improve policy performance across a variety of dataset sizes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.15397v5-abstract-full').style.display = 'none'; document.getElementById('2205.15397v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Z+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>