Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 52 results for author: <span class="mathjax">Balle, B</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Balle%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Balle, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Balle%2C+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Balle, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Balle%2C+B&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Balle%2C+B&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Balle%2C+B&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18914">arXiv:2501.18914</a> <span> [<a href="https://arxiv.org/pdf/2501.18914">pdf</a>, <a href="https://arxiv.org/format/2501.18914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Scaling Laws for Differentially Private Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=McKenna%2C+R">Ryan McKenna</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yangsibo Huang</a>, <a href="/search/cs?searchtype=author&query=Sinha%2C+A">Amer Sinha</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Charles%2C+Z">Zachary Charles</a>, <a href="/search/cs?searchtype=author&query=Choquette-Choo%2C+C+A">Christopher A. Choquette-Choo</a>, <a href="/search/cs?searchtype=author&query=Ghazi%2C+B">Badih Ghazi</a>, <a href="/search/cs?searchtype=author&query=Kaissis%2C+G">George Kaissis</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Ravi Kumar</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Ruibo Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Da Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chiyuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18914v1-abstract-short" style="display: inline;"> Scaling laws have emerged as important components of large language model (LLM) training as they can predict performance gains through scale, and provide guidance on important hyper-parameter choices that would otherwise be expensive. LLMs also rely on large, high-quality training datasets, like those sourced from (sometimes sensitive) user data. Training models on this sensitive user data require… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18914v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18914v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18914v1-abstract-full" style="display: none;"> Scaling laws have emerged as important components of large language model (LLM) training as they can predict performance gains through scale, and provide guidance on important hyper-parameter choices that would otherwise be expensive. LLMs also rely on large, high-quality training datasets, like those sourced from (sometimes sensitive) user data. Training models on this sensitive user data requires careful privacy protections like differential privacy (DP). However, the dynamics of DP training are significantly different, and consequently their scaling laws are not yet fully understood. In this work, we establish scaling laws that accurately model the intricacies of DP LLM training, providing a complete picture of the compute-privacy-utility tradeoffs and the optimal training configurations in many settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18914v1-abstract-full').style.display = 'none'; document.getElementById('2501.18914v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08970">arXiv:2501.08970</a> <span> [<a href="https://arxiv.org/pdf/2501.08970">pdf</a>, <a href="https://arxiv.org/format/2501.08970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Trusted Machine Learning Models Unlock Private Inference for Problems Currently Infeasible with Cryptography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shumailov%2C+I">Ilia Shumailov</a>, <a href="/search/cs?searchtype=author&query=Ramage%2C+D">Daniel Ramage</a>, <a href="/search/cs?searchtype=author&query=Meiklejohn%2C+S">Sarah Meiklejohn</a>, <a href="/search/cs?searchtype=author&query=Kairouz%2C+P">Peter Kairouz</a>, <a href="/search/cs?searchtype=author&query=Hartmann%2C+F">Florian Hartmann</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Bagdasarian%2C+E">Eugene Bagdasarian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08970v1-abstract-short" style="display: inline;"> We often interact with untrusted parties. Prioritization of privacy can limit the effectiveness of these interactions, as achieving certain goals necessitates sharing private data. Traditionally, addressing this challenge has involved either seeking trusted intermediaries or constructing cryptographic protocols that restrict how much data is revealed, such as multi-party computations or zero-knowl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08970v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08970v1-abstract-full" style="display: none;"> We often interact with untrusted parties. Prioritization of privacy can limit the effectiveness of these interactions, as achieving certain goals necessitates sharing private data. Traditionally, addressing this challenge has involved either seeking trusted intermediaries or constructing cryptographic protocols that restrict how much data is revealed, such as multi-party computations or zero-knowledge proofs. While significant advances have been made in scaling cryptographic approaches, they remain limited in terms of the size and complexity of applications they can be used for. In this paper, we argue that capable machine learning models can fulfill the role of a trusted third party, thus enabling secure computations for applications that were previously infeasible. In particular, we describe Trusted Capable Model Environments (TCMEs) as an alternative approach for scaling secure computation, where capable machine learning model(s) interact under input/output constraints, with explicit information flow control and explicit statelessness. This approach aims to achieve a balance between privacy and computational efficiency, enabling private inference where classical cryptographic solutions are currently infeasible. We describe a number of use cases that are enabled by TCME, and show that even some simple classic cryptographic problems can already be solved with TCME. Finally, we outline current limitations and discuss the path forward in implementing them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08970v1-abstract-full').style.display = 'none'; document.getElementById('2501.08970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13598">arXiv:2411.13598</a> <span> [<a href="https://arxiv.org/pdf/2411.13598">pdf</a>, <a href="https://arxiv.org/format/2411.13598">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Preserving Expert-Level Privacy in Offline Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+N">Navodita Sharma</a>, <a href="/search/cs?searchtype=author&query=Vinod%2C+V">Vishnu Vinod</a>, <a href="/search/cs?searchtype=author&query=Thakurta%2C+A">Abhradeep Thakurta</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Alekh Agarwal</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Dann%2C+C">Christoph Dann</a>, <a href="/search/cs?searchtype=author&query=Raghuveer%2C+A">Aravindan Raghuveer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13598v1-abstract-short" style="display: inline;"> The offline reinforcement learning (RL) problem aims to learn an optimal policy from historical data collected by one or more behavioural policies (experts) by interacting with an environment. However, the individual experts may be privacy-sensitive in that the learnt policy may retain information about their precise choices. In some domains like personalized retrieval, advertising and healthcare,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13598v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13598v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13598v1-abstract-full" style="display: none;"> The offline reinforcement learning (RL) problem aims to learn an optimal policy from historical data collected by one or more behavioural policies (experts) by interacting with an environment. However, the individual experts may be privacy-sensitive in that the learnt policy may retain information about their precise choices. In some domains like personalized retrieval, advertising and healthcare, the expert choices are considered sensitive data. To provably protect the privacy of such experts, we propose a novel consensus-based expert-level differentially private offline RL training approach compatible with any existing offline RL algorithm. We prove rigorous differential privacy guarantees, while maintaining strong empirical performance. Unlike existing work in differentially private RL, we supplement the theory with proof-of-concept experiments on classic RL environments featuring large continuous state spaces, demonstrating substantial improvements over a natural baseline across multiple tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13598v1-abstract-full').style.display = 'none'; document.getElementById('2411.13598v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10614">arXiv:2411.10614</a> <span> [<a href="https://arxiv.org/pdf/2411.10614">pdf</a>, <a href="https://arxiv.org/format/2411.10614">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> To Shuffle or not to Shuffle: Auditing DP-SGD with Shuffling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Annamalai%2C+M+S+M+S">Meenatchi Sundaram Muthu Selva Annamalai</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=De+Cristofaro%2C+E">Emiliano De Cristofaro</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10614v1-abstract-short" style="display: inline;"> Differentially Private Stochastic Gradient Descent (DP-SGD) is a popular method for training machine learning models with formal Differential Privacy (DP) guarantees. As DP-SGD processes the training data in batches, it uses Poisson sub-sampling to select batches at each step. However, due to computational and compatibility benefits, replacing sub-sampling with shuffling has become common practice… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10614v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10614v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10614v1-abstract-full" style="display: none;"> Differentially Private Stochastic Gradient Descent (DP-SGD) is a popular method for training machine learning models with formal Differential Privacy (DP) guarantees. As DP-SGD processes the training data in batches, it uses Poisson sub-sampling to select batches at each step. However, due to computational and compatibility benefits, replacing sub-sampling with shuffling has become common practice. Yet, since tight theoretical guarantees for shuffling are currently unknown, prior work using shuffling reports DP guarantees as though Poisson sub-sampling was used. This prompts the need to verify whether this discrepancy is reflected in a gap between the theoretical guarantees from state-of-the-art models and the actual privacy leakage. To do so, we introduce a novel DP auditing procedure to analyze DP-SGD with shuffling. We show that state-of-the-art DP models trained with shuffling appreciably overestimated privacy guarantees (up to 4x). In the process, we assess the impact of several parameters, such as batch size, privacy budget, and threat model, on privacy leakage. Finally, we study two variations of the shuffling procedure found in the wild, which result in further privacy leakage. Overall, our work empirically attests to the risk of using shuffling instead of Poisson sub-sampling vis-脿-vis the actual privacy leakage of DP-SGD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10614v1-abstract-full').style.display = 'none'; document.getElementById('2411.10614v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06186">arXiv:2410.06186</a> <span> [<a href="https://arxiv.org/pdf/2410.06186">pdf</a>, <a href="https://arxiv.org/format/2410.06186">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Last Iterate Advantage: Empirical Auditing and Principled Heuristic Analysis of Differentially Private SGD </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Steinke%2C+T">Thomas Steinke</a>, <a href="/search/cs?searchtype=author&query=Nasr%2C+M">Milad Nasr</a>, <a href="/search/cs?searchtype=author&query=Ganesh%2C+A">Arun Ganesh</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Choquette-Choo%2C+C+A">Christopher A. Choquette-Choo</a>, <a href="/search/cs?searchtype=author&query=Jagielski%2C+M">Matthew Jagielski</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Thakurta%2C+A+G">Abhradeep Guha Thakurta</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+A">Adam Smith</a>, <a href="/search/cs?searchtype=author&query=Terzis%2C+A">Andreas Terzis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06186v2-abstract-short" style="display: inline;"> We propose a simple heuristic privacy analysis of noisy clipped stochastic gradient descent (DP-SGD) in the setting where only the last iterate is released and the intermediate iterates remain hidden. Namely, our heuristic assumes a linear structure for the model. We show experimentally that our heuristic is predictive of the outcome of privacy auditing applied to various training procedures. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06186v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06186v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06186v2-abstract-full" style="display: none;"> We propose a simple heuristic privacy analysis of noisy clipped stochastic gradient descent (DP-SGD) in the setting where only the last iterate is released and the intermediate iterates remain hidden. Namely, our heuristic assumes a linear structure for the model. We show experimentally that our heuristic is predictive of the outcome of privacy auditing applied to various training procedures. Thus it can be used prior to training as a rough estimate of the final privacy leakage. We also probe the limitations of our heuristic by providing some artificial counterexamples where it underestimates the privacy leakage. The standard composition-based privacy analysis of DP-SGD effectively assumes that the adversary has access to all intermediate iterates, which is often unrealistic. However, this analysis remains the state of the art in practice. While our heuristic does not replace a rigorous privacy analysis, it illustrates the large gap between the best theoretical upper bounds and the privacy auditing lower bounds and sets a target for further work to improve the theoretical privacy analyses. We also empirically support our heuristic and show existing privacy auditing attacks are bounded by our heuristic analysis in both vision and language tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06186v2-abstract-full').style.display = 'none'; document.getElementById('2410.06186v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03883">arXiv:2410.03883</a> <span> [<a href="https://arxiv.org/pdf/2410.03883">pdf</a>, <a href="https://arxiv.org/format/2410.03883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> DiSK: Differentially Private Optimizer with Simplified Kalman Filter for Noise Reduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+Z">Zhiqi Bu</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+M">Mingyi Hong</a>, <a href="/search/cs?searchtype=author&query=Razaviyayn%2C+M">Meisam Razaviyayn</a>, <a href="/search/cs?searchtype=author&query=Mirrokni%2C+V">Vahab Mirrokni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03883v1-abstract-short" style="display: inline;"> Differential privacy (DP) offers a robust framework for safeguarding individual data privacy. To utilize DP in training modern machine learning models, differentially private optimizers have been widely used in recent years. A popular approach to privatize an optimizer is to clip the individual gradients and add sufficiently large noise to the clipped gradient. This approach led to the development… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03883v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03883v1-abstract-full" style="display: none;"> Differential privacy (DP) offers a robust framework for safeguarding individual data privacy. To utilize DP in training modern machine learning models, differentially private optimizers have been widely used in recent years. A popular approach to privatize an optimizer is to clip the individual gradients and add sufficiently large noise to the clipped gradient. This approach led to the development of DP optimizers that have comparable performance with their non-private counterparts in fine-tuning tasks or in tasks with a small number of training parameters. However, a significant performance drop is observed when these optimizers are applied to large-scale training. This degradation stems from the substantial noise injection required to maintain DP, which disrupts the optimizer's dynamics. This paper introduces DiSK, a novel framework designed to significantly enhance the performance of DP optimizers. DiSK employs Kalman filtering, a technique drawn from control and signal processing, to effectively denoise privatized gradients and generate progressively refined gradient estimations. To ensure practicality for large-scale training, we simplify the Kalman filtering process, minimizing its memory and computational demands. We establish theoretical privacy-utility trade-off guarantees for DiSK, and demonstrate provable improvements over standard DP optimizers like DPSGD in terms of iteration complexity upper-bound. Extensive experiments across diverse tasks, including vision tasks such as CIFAR-100 and ImageNet-1k and language fine-tuning tasks such as GLUE, E2E, and DART, validate the effectiveness of DiSK. The results showcase its ability to significantly improve the performance of DP optimizers, surpassing state-of-the-art results under the same privacy constraints on several benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03883v1-abstract-full').style.display = 'none'; document.getElementById('2410.03883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13903">arXiv:2409.13903</a> <span> [<a href="https://arxiv.org/pdf/2409.13903">pdf</a>, <a href="https://arxiv.org/format/2409.13903">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CI-Bench: Benchmarking Contextual Integrity of AI Assistants on Synthetic Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+Z">Zhao Cheng</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+D">Diane Wan</a>, <a href="/search/cs?searchtype=author&query=Abueg%2C+M">Matthew Abueg</a>, <a href="/search/cs?searchtype=author&query=Ghalebikesabi%2C+S">Sahra Ghalebikesabi</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ren Yi</a>, <a href="/search/cs?searchtype=author&query=Bagdasarian%2C+E">Eugene Bagdasarian</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Mellem%2C+S">Stefan Mellem</a>, <a href="/search/cs?searchtype=author&query=O%27Banion%2C+S">Shawn O'Banion</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13903v1-abstract-short" style="display: inline;"> Advances in generative AI point towards a new era of personalized applications that perform diverse tasks on behalf of users. While general AI assistants have yet to fully emerge, their potential to share personal data raises significant privacy challenges. This paper introduces CI-Bench, a comprehensive synthetic benchmark for evaluating the ability of AI assistants to protect personal informatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13903v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13903v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13903v1-abstract-full" style="display: none;"> Advances in generative AI point towards a new era of personalized applications that perform diverse tasks on behalf of users. While general AI assistants have yet to fully emerge, their potential to share personal data raises significant privacy challenges. This paper introduces CI-Bench, a comprehensive synthetic benchmark for evaluating the ability of AI assistants to protect personal information during model inference. Leveraging the Contextual Integrity framework, our benchmark enables systematic assessment of information flow across important context dimensions, including roles, information types, and transmission principles. We present a novel, scalable, multi-step synthetic data pipeline for generating natural communications, including dialogues and emails. Unlike previous work with smaller, narrowly focused evaluations, we present a novel, scalable, multi-step data pipeline that synthetically generates natural communications, including dialogues and emails, which we use to generate 44 thousand test samples across eight domains. Additionally, we formulate and evaluate a naive AI assistant to demonstrate the need for further study and careful training towards personal assistant tasks. We envision CI-Bench as a valuable tool for guiding future language model development, deployment, system design, and dataset construction, ultimately contributing to the development of AI assistants that align with users' privacy expectations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13903v1-abstract-full').style.display = 'none'; document.getElementById('2409.13903v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02373">arXiv:2408.02373</a> <span> [<a href="https://arxiv.org/pdf/2408.02373">pdf</a>, <a href="https://arxiv.org/format/2408.02373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Operationalizing Contextual Integrity in Privacy-Conscious Assistants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghalebikesabi%2C+S">Sahra Ghalebikesabi</a>, <a href="/search/cs?searchtype=author&query=Bagdasaryan%2C+E">Eugene Bagdasaryan</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ren Yi</a>, <a href="/search/cs?searchtype=author&query=Yona%2C+I">Itay Yona</a>, <a href="/search/cs?searchtype=author&query=Shumailov%2C+I">Ilia Shumailov</a>, <a href="/search/cs?searchtype=author&query=Pappu%2C+A">Aneesh Pappu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+C">Chongyang Shi</a>, <a href="/search/cs?searchtype=author&query=Weidinger%2C+L">Laura Weidinger</a>, <a href="/search/cs?searchtype=author&query=Stanforth%2C+R">Robert Stanforth</a>, <a href="/search/cs?searchtype=author&query=Berrada%2C+L">Leonard Berrada</a>, <a href="/search/cs?searchtype=author&query=Kohli%2C+P">Pushmeet Kohli</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Po-Sen Huang</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02373v2-abstract-short" style="display: inline;"> Advanced AI assistants combine frontier LLMs and tool access to autonomously perform complex tasks on behalf of users. While the helpfulness of such assistants can increase dramatically with access to user information including emails and documents, this raises privacy concerns about assistants sharing inappropriate information with third parties without user supervision. To steer information-shar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02373v2-abstract-full').style.display = 'inline'; document.getElementById('2408.02373v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02373v2-abstract-full" style="display: none;"> Advanced AI assistants combine frontier LLMs and tool access to autonomously perform complex tasks on behalf of users. While the helpfulness of such assistants can increase dramatically with access to user information including emails and documents, this raises privacy concerns about assistants sharing inappropriate information with third parties without user supervision. To steer information-sharing assistants to behave in accordance with privacy expectations, we propose to operationalize contextual integrity (CI), a framework that equates privacy with the appropriate flow of information in a given context. In particular, we design and evaluate a number of strategies to steer assistants' information-sharing actions to be CI compliant. Our evaluation is based on a novel form filling benchmark composed of human annotations of common webform applications, and it reveals that prompting frontier LLMs to perform CI-based reasoning yields strong results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02373v2-abstract-full').style.display = 'none'; document.getElementById('2408.02373v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08918">arXiv:2406.08918</a> <span> [<a href="https://arxiv.org/pdf/2406.08918">pdf</a>, <a href="https://arxiv.org/format/2406.08918">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Beyond the Calibration Point: Mechanism Comparison in Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kaissis%2C+G">Georgios Kaissis</a>, <a href="/search/cs?searchtype=author&query=Kolek%2C+S">Stefan Kolek</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Rueckert%2C+D">Daniel Rueckert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08918v2-abstract-short" style="display: inline;"> In differentially private (DP) machine learning, the privacy guarantees of DP mechanisms are often reported and compared on the basis of a single $(\varepsilon, 未)$-pair. This practice overlooks that DP guarantees can vary substantially even between mechanisms sharing a given $(\varepsilon, 未)$, and potentially introduces privacy vulnerabilities which can remain undetected. This motivates the need… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08918v2-abstract-full').style.display = 'inline'; document.getElementById('2406.08918v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08918v2-abstract-full" style="display: none;"> In differentially private (DP) machine learning, the privacy guarantees of DP mechanisms are often reported and compared on the basis of a single $(\varepsilon, 未)$-pair. This practice overlooks that DP guarantees can vary substantially even between mechanisms sharing a given $(\varepsilon, 未)$, and potentially introduces privacy vulnerabilities which can remain undetected. This motivates the need for robust, rigorous methods for comparing DP guarantees in such cases. Here, we introduce the $螖$-divergence between mechanisms which quantifies the worst-case excess privacy vulnerability of choosing one mechanism over another in terms of $(\varepsilon, 未)$, $f$-DP and in terms of a newly presented Bayesian interpretation. Moreover, as a generalisation of the Blackwell theorem, it is endowed with strong decision-theoretic foundations. Through application examples, we show that our techniques can facilitate informed decision-making and reveal gaps in the current understanding of privacy risks, as current practices in DP-SGD often result in choosing mechanisms with high excess privacy vulnerabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08918v2-abstract-full').style.display = 'none'; document.getElementById('2406.08918v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05175">arXiv:2405.05175</a> <span> [<a href="https://arxiv.org/pdf/2405.05175">pdf</a>, <a href="https://arxiv.org/format/2405.05175">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AirGapAgent: Protecting Privacy-Conscious Conversational Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bagdasarian%2C+E">Eugene Bagdasarian</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ren Yi</a>, <a href="/search/cs?searchtype=author&query=Ghalebikesabi%2C+S">Sahra Ghalebikesabi</a>, <a href="/search/cs?searchtype=author&query=Kairouz%2C+P">Peter Kairouz</a>, <a href="/search/cs?searchtype=author&query=Gruteser%2C+M">Marco Gruteser</a>, <a href="/search/cs?searchtype=author&query=Oh%2C+S">Sewoong Oh</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Ramage%2C+D">Daniel Ramage</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05175v2-abstract-short" style="display: inline;"> The growing use of large language model (LLM)-based conversational agents to manage sensitive user data raises significant privacy concerns. While these agents excel at understanding and acting on context, this capability can be exploited by malicious actors. We introduce a novel threat model where adversarial third-party apps manipulate the context of interaction to trick LLM-based agents into re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05175v2-abstract-full').style.display = 'inline'; document.getElementById('2405.05175v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05175v2-abstract-full" style="display: none;"> The growing use of large language model (LLM)-based conversational agents to manage sensitive user data raises significant privacy concerns. While these agents excel at understanding and acting on context, this capability can be exploited by malicious actors. We introduce a novel threat model where adversarial third-party apps manipulate the context of interaction to trick LLM-based agents into revealing private information not relevant to the task at hand. Grounded in the framework of contextual integrity, we introduce AirGapAgent, a privacy-conscious agent designed to prevent unintended data leakage by restricting the agent's access to only the data necessary for a specific task. Extensive experiments using Gemini, GPT, and Mistral models as agents validate our approach's effectiveness in mitigating this form of context hijacking while maintaining core agent functionality. For example, we show that a single-query context hijacking attack on a Gemini Ultra agent reduces its ability to protect user data from 94% to 45%, while an AirGapAgent achieves 97% protection, rendering the same attack ineffective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05175v2-abstract-full').style.display = 'none'; document.getElementById('2405.05175v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">at CCS'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16244">arXiv:2404.16244</a> <span> [<a href="https://arxiv.org/pdf/2404.16244">pdf</a>, <a href="https://arxiv.org/format/2404.16244">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> The Ethics of Advanced AI Assistants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gabriel%2C+I">Iason Gabriel</a>, <a href="/search/cs?searchtype=author&query=Manzini%2C+A">Arianna Manzini</a>, <a href="/search/cs?searchtype=author&query=Keeling%2C+G">Geoff Keeling</a>, <a href="/search/cs?searchtype=author&query=Hendricks%2C+L+A">Lisa Anne Hendricks</a>, <a href="/search/cs?searchtype=author&query=Rieser%2C+V">Verena Rieser</a>, <a href="/search/cs?searchtype=author&query=Iqbal%2C+H">Hasan Iqbal</a>, <a href="/search/cs?searchtype=author&query=Toma%C5%A1ev%2C+N">Nenad Toma拧ev</a>, <a href="/search/cs?searchtype=author&query=Ktena%2C+I">Ira Ktena</a>, <a href="/search/cs?searchtype=author&query=Kenton%2C+Z">Zachary Kenton</a>, <a href="/search/cs?searchtype=author&query=Rodriguez%2C+M">Mikel Rodriguez</a>, <a href="/search/cs?searchtype=author&query=El-Sayed%2C+S">Seliem El-Sayed</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+S">Sasha Brown</a>, <a href="/search/cs?searchtype=author&query=Akbulut%2C+C">Canfer Akbulut</a>, <a href="/search/cs?searchtype=author&query=Trask%2C+A">Andrew Trask</a>, <a href="/search/cs?searchtype=author&query=Hughes%2C+E">Edward Hughes</a>, <a href="/search/cs?searchtype=author&query=Bergman%2C+A+S">A. Stevie Bergman</a>, <a href="/search/cs?searchtype=author&query=Shelby%2C+R">Renee Shelby</a>, <a href="/search/cs?searchtype=author&query=Marchal%2C+N">Nahema Marchal</a>, <a href="/search/cs?searchtype=author&query=Griffin%2C+C">Conor Griffin</a>, <a href="/search/cs?searchtype=author&query=Mateos-Garcia%2C+J">Juan Mateos-Garcia</a>, <a href="/search/cs?searchtype=author&query=Weidinger%2C+L">Laura Weidinger</a>, <a href="/search/cs?searchtype=author&query=Street%2C+W">Winnie Street</a>, <a href="/search/cs?searchtype=author&query=Lange%2C+B">Benjamin Lange</a>, <a href="/search/cs?searchtype=author&query=Ingerman%2C+A">Alex Ingerman</a>, <a href="/search/cs?searchtype=author&query=Lentz%2C+A">Alison Lentz</a> , et al. (32 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16244v2-abstract-short" style="display: inline;"> This paper focuses on the opportunities and the ethical and societal risks posed by advanced AI assistants. We define advanced AI assistants as artificial agents with natural language interfaces, whose function is to plan and execute sequences of actions on behalf of a user, across one or more domains, in line with the user's expectations. The paper starts by considering the technology itself, pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16244v2-abstract-full').style.display = 'inline'; document.getElementById('2404.16244v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16244v2-abstract-full" style="display: none;"> This paper focuses on the opportunities and the ethical and societal risks posed by advanced AI assistants. We define advanced AI assistants as artificial agents with natural language interfaces, whose function is to plan and execute sequences of actions on behalf of a user, across one or more domains, in line with the user's expectations. The paper starts by considering the technology itself, providing an overview of AI assistants, their technical foundations and potential range of applications. It then explores questions around AI value alignment, well-being, safety and malicious uses. Extending the circle of inquiry further, we next consider the relationship between advanced AI assistants and individual users in more detail, exploring topics such as manipulation and persuasion, anthropomorphism, appropriate relationships, trust and privacy. With this analysis in place, we consider the deployment of advanced assistants at a societal scale, focusing on cooperation, equity and access, misinformation, economic impact, the environment and how best to evaluate advanced AI assistants. Finally, we conclude by providing a range of recommendations for researchers, developers, policymakers and public stakeholders. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16244v2-abstract-full').style.display = 'none'; document.getElementById('2404.16244v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.06137">arXiv:2402.06137</a> <span> [<a href="https://arxiv.org/pdf/2402.06137">pdf</a>, <a href="https://arxiv.org/format/2402.06137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> On the Privacy of Selection Mechanisms with Gaussian Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lebensold%2C+J">Jonathan Lebensold</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.06137v2-abstract-short" style="display: inline;"> Report Noisy Max and Above Threshold are two classical differentially private (DP) selection mechanisms. Their output is obtained by adding noise to a sequence of low-sensitivity queries and reporting the identity of the query whose (noisy) answer satisfies a certain condition. Pure DP guarantees for these mechanisms are easy to obtain when Laplace noise is added to the queries. On the other hand,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06137v2-abstract-full').style.display = 'inline'; document.getElementById('2402.06137v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.06137v2-abstract-full" style="display: none;"> Report Noisy Max and Above Threshold are two classical differentially private (DP) selection mechanisms. Their output is obtained by adding noise to a sequence of low-sensitivity queries and reporting the identity of the query whose (noisy) answer satisfies a certain condition. Pure DP guarantees for these mechanisms are easy to obtain when Laplace noise is added to the queries. On the other hand, when instantiated using Gaussian noise, standard analyses only yield approximate DP guarantees despite the fact that the outputs of these mechanisms lie in a discrete space. In this work, we revisit the analysis of Report Noisy Max and Above Threshold with Gaussian noise and show that, under the additional assumption that the underlying queries are bounded, it is possible to provide pure ex-ante DP bounds for Report Noisy Max and pure ex-post DP bounds for Above Threshold. The resulting bounds are tight and depend on closed-form expressions that can be numerically evaluated using standard methods. Empirically we find these lead to tighter privacy accounting in the high privacy, low data regime. Further, we propose a simple privacy filter for composing pure ex-post DP guarantees, and use it to derive a fully adaptive Gaussian Sparse Vector Technique mechanism. Finally, we provide experiments on mobility and energy consumption datasets demonstrating that our Sparse Vector Technique is practically competitive with previous approaches and requires less hyper-parameter tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06137v2-abstract-full').style.display = 'none'; document.getElementById('2402.06137v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AISTATS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.10888">arXiv:2308.10888</a> <span> [<a href="https://arxiv.org/pdf/2308.10888">pdf</a>, <a href="https://arxiv.org/format/2308.10888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Accuracy and Fairness in Differentially Private Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Berrada%2C+L">Leonard Berrada</a>, <a href="/search/cs?searchtype=author&query=De%2C+S">Soham De</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+J+H">Judy Hanwen Shen</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Stanforth%2C+R">Robert Stanforth</a>, <a href="/search/cs?searchtype=author&query=Stutz%2C+D">David Stutz</a>, <a href="/search/cs?searchtype=author&query=Kohli%2C+P">Pushmeet Kohli</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+S+L">Samuel L. Smith</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.10888v1-abstract-short" style="display: inline;"> Privacy-preserving machine learning aims to train models on private data without leaking sensitive information. Differential privacy (DP) is considered the gold standard framework for privacy-preserving training, as it provides formal privacy guarantees. However, compared to their non-private counterparts, models trained with DP often have significantly reduced accuracy. Private classifiers are al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10888v1-abstract-full').style.display = 'inline'; document.getElementById('2308.10888v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.10888v1-abstract-full" style="display: none;"> Privacy-preserving machine learning aims to train models on private data without leaking sensitive information. Differential privacy (DP) is considered the gold standard framework for privacy-preserving training, as it provides formal privacy guarantees. However, compared to their non-private counterparts, models trained with DP often have significantly reduced accuracy. Private classifiers are also believed to exhibit larger performance disparities across subpopulations, raising fairness concerns. The poor performance of classifiers trained with DP has prevented the widespread adoption of privacy preserving machine learning in industry. Here we show that pre-trained foundation models fine-tuned with DP can achieve similar accuracy to non-private classifiers, even in the presence of significant distribution shifts between pre-training data and downstream tasks. We achieve private accuracies within a few percent of the non-private state of the art across four datasets, including two medical imaging benchmarks. Furthermore, our private medical classifiers do not exhibit larger performance disparities across demographic groups than non-private models. This milestone to make DP training a practical and reliable technology has the potential to widely enable machine learning practitioners to train safely on sensitive datasets while protecting individuals' privacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10888v1-abstract-full').style.display = 'none'; document.getElementById('2308.10888v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00135">arXiv:2306.00135</a> <span> [<a href="https://arxiv.org/pdf/2306.00135">pdf</a>, <a href="https://arxiv.org/format/2306.00135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1017/S0960129524000276">10.1017/S0960129524000276 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Optimal Approximate Minimization of One-Letter Weighted Finite Automata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lacroce%2C+C">Clara Lacroce</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Panangaden%2C+P">Prakash Panangaden</a>, <a href="/search/cs?searchtype=author&query=Rabusseau%2C+G">Guillaume Rabusseau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00135v1-abstract-short" style="display: inline;"> In this paper, we study the approximate minimization problem of weighted finite automata (WFAs): to compute the best possible approximation of a WFA given a bound on the number of states. By reformulating the problem in terms of Hankel matrices, we leverage classical results on the approximation of Hankel operators, namely the celebrated Adamyan-Arov-Krein (AAK) theory. We solve the optimal spec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00135v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00135v1-abstract-full" style="display: none;"> In this paper, we study the approximate minimization problem of weighted finite automata (WFAs): to compute the best possible approximation of a WFA given a bound on the number of states. By reformulating the problem in terms of Hankel matrices, we leverage classical results on the approximation of Hankel operators, namely the celebrated Adamyan-Arov-Krein (AAK) theory. We solve the optimal spectral-norm approximate minimization problem for irredundant WFAs with real weights, defined over a one-letter alphabet. We present a theoretical analysis based on AAK theory, and bounds on the quality of the approximation in the spectral norm and $\ell^2$ norm. Moreover, we provide a closed-form solution, and an algorithm, to compute the optimal approximation of a given size in polynomial time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00135v1-abstract-full').style.display = 'none'; document.getElementById('2306.00135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages. arXiv admin note: substantial text overlap with arXiv:2102.06860</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Math. Struct. Comp. Sci. 34 (2024) 807-833 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10867">arXiv:2305.10867</a> <span> [<a href="https://arxiv.org/pdf/2305.10867">pdf</a>, <a href="https://arxiv.org/format/2305.10867">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Amplification by Shuffling without Shuffling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Bell%2C+J">James Bell</a>, <a href="/search/cs?searchtype=author&query=Gasc%C3%B3n%2C+A">Adri脿 Gasc贸n</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10867v2-abstract-short" style="display: inline;"> Motivated by recent developments in the shuffle model of differential privacy, we propose a new approximate shuffling functionality called Alternating Shuffle, and provide a protocol implementing alternating shuffling in a single-server threat model where the adversary observes all communication. Unlike previous shuffling protocols in this threat model, the per-client communication of our protocol… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10867v2-abstract-full').style.display = 'inline'; document.getElementById('2305.10867v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10867v2-abstract-full" style="display: none;"> Motivated by recent developments in the shuffle model of differential privacy, we propose a new approximate shuffling functionality called Alternating Shuffle, and provide a protocol implementing alternating shuffling in a single-server threat model where the adversary observes all communication. Unlike previous shuffling protocols in this threat model, the per-client communication of our protocol only grows sub-linearly in the number of clients. Moreover, we study the concrete efficiency of our protocol and show it can improve per-client communication by one or more orders of magnitude with respect to previous (approximate) shuffling protocols. We also show a differential privacy amplification result for alternating shuffling analogous to the one for uniform shuffling, and demonstrate that shuffling-based protocols for secure summation based a construction of Ishai et al. (FOCS'06) remain secure under the Alternating Shuffle. In the process we also develop a protocol for exact shuffling in single-server threat model with amortized logarithmic communication per-client which might be of independent interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10867v2-abstract-full').style.display = 'none'; document.getElementById('2305.10867v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> CCS 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.13861">arXiv:2302.13861</a> <span> [<a href="https://arxiv.org/pdf/2302.13861">pdf</a>, <a href="https://arxiv.org/format/2302.13861">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Diffusion Models Generate Useful Synthetic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghalebikesabi%2C+S">Sahra Ghalebikesabi</a>, <a href="/search/cs?searchtype=author&query=Berrada%2C+L">Leonard Berrada</a>, <a href="/search/cs?searchtype=author&query=Gowal%2C+S">Sven Gowal</a>, <a href="/search/cs?searchtype=author&query=Ktena%2C+I">Ira Ktena</a>, <a href="/search/cs?searchtype=author&query=Stanforth%2C+R">Robert Stanforth</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=De%2C+S">Soham De</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+S+L">Samuel L. Smith</a>, <a href="/search/cs?searchtype=author&query=Wiles%2C+O">Olivia Wiles</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.13861v1-abstract-short" style="display: inline;"> The ability to generate privacy-preserving synthetic versions of sensitive image datasets could unlock numerous ML applications currently constrained by data availability. Due to their astonishing image generation quality, diffusion models are a prime candidate for generating high-quality synthetic data. However, recent studies have found that, by default, the outputs of some diffusion models do n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.13861v1-abstract-full').style.display = 'inline'; document.getElementById('2302.13861v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.13861v1-abstract-full" style="display: none;"> The ability to generate privacy-preserving synthetic versions of sensitive image datasets could unlock numerous ML applications currently constrained by data availability. Due to their astonishing image generation quality, diffusion models are a prime candidate for generating high-quality synthetic data. However, recent studies have found that, by default, the outputs of some diffusion models do not preserve training data privacy. By privately fine-tuning ImageNet pre-trained diffusion models with more than 80M parameters, we obtain SOTA results on CIFAR-10 and Camelyon17 in terms of both FID and the accuracy of downstream classifiers trained on synthetic data. We decrease the SOTA FID on CIFAR-10 from 26.2 to 9.8, and increase the accuracy from 51.0% to 88.0%. On synthetic data from Camelyon17, we achieve a downstream accuracy of 91.1% which is close to the SOTA of 96.5% when training on the real data. We leverage the ability of generative models to create infinite amounts of data to maximise the downstream prediction performance, and further show how to use synthetic data for hyperparameter tuning. Our results demonstrate that diffusion models fine-tuned with differential privacy can produce useful and provably private synthetic data, even in applications with significant distribution shift between the pre-training and fine-tuning distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.13861v1-abstract-full').style.display = 'none'; document.getElementById('2302.13861v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.07956">arXiv:2302.07956</a> <span> [<a href="https://arxiv.org/pdf/2302.07956">pdf</a>, <a href="https://arxiv.org/format/2302.07956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Tight Auditing of Differentially Private Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nasr%2C+M">Milad Nasr</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Steinke%2C+T">Thomas Steinke</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Tram%C3%A8r%2C+F">Florian Tram猫r</a>, <a href="/search/cs?searchtype=author&query=Jagielski%2C+M">Matthew Jagielski</a>, <a href="/search/cs?searchtype=author&query=Carlini%2C+N">Nicholas Carlini</a>, <a href="/search/cs?searchtype=author&query=Terzis%2C+A">Andreas Terzis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.07956v1-abstract-short" style="display: inline;"> Auditing mechanisms for differential privacy use probabilistic means to empirically estimate the privacy level of an algorithm. For private machine learning, existing auditing mechanisms are tight: the empirical privacy estimate (nearly) matches the algorithm's provable privacy guarantee. But these auditing techniques suffer from two limitations. First, they only give tight estimates under implaus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.07956v1-abstract-full').style.display = 'inline'; document.getElementById('2302.07956v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.07956v1-abstract-full" style="display: none;"> Auditing mechanisms for differential privacy use probabilistic means to empirically estimate the privacy level of an algorithm. For private machine learning, existing auditing mechanisms are tight: the empirical privacy estimate (nearly) matches the algorithm's provable privacy guarantee. But these auditing techniques suffer from two limitations. First, they only give tight estimates under implausible worst-case assumptions (e.g., a fully adversarial dataset). Second, they require thousands or millions of training runs to produce non-trivial statistical estimates of the privacy leakage. This work addresses both issues. We design an improved auditing scheme that yields tight privacy estimates for natural (not adversarially crafted) datasets -- if the adversary can see all model updates during training. Prior auditing works rely on the same assumption, which is permitted under the standard differential privacy threat model. This threat model is also applicable, e.g., in federated learning settings. Moreover, our auditing scheme requires only two training runs (instead of thousands) to produce tight privacy estimates, by adapting recent advances in tight composition theorems for differential privacy. We demonstrate the utility of our improved auditing schemes by surfacing implementation bugs in private machine learning code that eluded prior auditing techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.07956v1-abstract-full').style.display = 'none'; document.getElementById('2302.07956v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.07225">arXiv:2302.07225</a> <span> [<a href="https://arxiv.org/pdf/2302.07225">pdf</a>, <a href="https://arxiv.org/format/2302.07225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bounding Training Data Reconstruction in DP-SGD </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Mahloujifar%2C+S">Saeed Mahloujifar</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.07225v3-abstract-short" style="display: inline;"> Differentially private training offers a protection which is usually interpreted as a guarantee against membership inference attacks. By proxy, this guarantee extends to other threats like reconstruction attacks attempting to extract complete training examples. Recent works provide evidence that if one does not need to protect against membership attacks but instead only wants to protect against tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.07225v3-abstract-full').style.display = 'inline'; document.getElementById('2302.07225v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.07225v3-abstract-full" style="display: none;"> Differentially private training offers a protection which is usually interpreted as a guarantee against membership inference attacks. By proxy, this guarantee extends to other threats like reconstruction attacks attempting to extract complete training examples. Recent works provide evidence that if one does not need to protect against membership attacks but instead only wants to protect against training data reconstruction, then utility of private models can be improved because less noise is required to protect against these more ambitious attacks. We investigate this further in the context of DP-SGD, a standard algorithm for private deep learning, and provide an upper bound on the success of any reconstruction attack against DP-SGD together with an attack that empirically matches the predictions of our bound. Together, these two results open the door to fine-grained investigations on how to set the privacy parameters of DP-SGD in practice to protect against reconstruction attacks. Finally, we use our methods to demonstrate that different settings of the DP-SGD parameters leading to the same DP guarantees can result in significantly different success rates for reconstruction, indicating that the DP guarantee alone might not be a good proxy for controlling the protection against reconstruction attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.07225v3-abstract-full').style.display = 'none'; document.getElementById('2302.07225v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">New experiments and comparison with related work</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.13188">arXiv:2301.13188</a> <span> [<a href="https://arxiv.org/pdf/2301.13188">pdf</a>, <a href="https://arxiv.org/format/2301.13188">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Extracting Training Data from Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Carlini%2C+N">Nicholas Carlini</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Nasr%2C+M">Milad Nasr</a>, <a href="/search/cs?searchtype=author&query=Jagielski%2C+M">Matthew Jagielski</a>, <a href="/search/cs?searchtype=author&query=Sehwag%2C+V">Vikash Sehwag</a>, <a href="/search/cs?searchtype=author&query=Tram%C3%A8r%2C+F">Florian Tram猫r</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Ippolito%2C+D">Daphne Ippolito</a>, <a href="/search/cs?searchtype=author&query=Wallace%2C+E">Eric Wallace</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.13188v1-abstract-short" style="display: inline;"> Image diffusion models such as DALL-E 2, Imagen, and Stable Diffusion have attracted significant attention due to their ability to generate high-quality synthetic images. In this work, we show that diffusion models memorize individual images from their training data and emit them at generation time. With a generate-and-filter pipeline, we extract over a thousand training examples from state-of-the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13188v1-abstract-full').style.display = 'inline'; document.getElementById('2301.13188v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.13188v1-abstract-full" style="display: none;"> Image diffusion models such as DALL-E 2, Imagen, and Stable Diffusion have attracted significant attention due to their ability to generate high-quality synthetic images. In this work, we show that diffusion models memorize individual images from their training data and emit them at generation time. With a generate-and-filter pipeline, we extract over a thousand training examples from state-of-the-art models, ranging from photographs of individual people to trademarked company logos. We also train hundreds of diffusion models in various settings to analyze how different modeling and data decisions affect privacy. Overall, our results show that diffusion models are much less private than prior generative models such as GANs, and that mitigating these vulnerabilities may require new advances in privacy-preserving training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13188v1-abstract-full').style.display = 'none'; document.getElementById('2301.13188v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.13650">arXiv:2204.13650</a> <span> [<a href="https://arxiv.org/pdf/2204.13650">pdf</a>, <a href="https://arxiv.org/format/2204.13650">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Unlocking High-Accuracy Differentially Private Image Classification through Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=De%2C+S">Soham De</a>, <a href="/search/cs?searchtype=author&query=Berrada%2C+L">Leonard Berrada</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+S+L">Samuel L. Smith</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.13650v2-abstract-short" style="display: inline;"> Differential Privacy (DP) provides a formal privacy guarantee preventing adversaries with access to a machine learning model from extracting information about individual training points. Differentially Private Stochastic Gradient Descent (DP-SGD), the most popular DP training method for deep learning, realizes this protection by injecting noise during training. However previous works have found th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.13650v2-abstract-full').style.display = 'inline'; document.getElementById('2204.13650v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.13650v2-abstract-full" style="display: none;"> Differential Privacy (DP) provides a formal privacy guarantee preventing adversaries with access to a machine learning model from extracting information about individual training points. Differentially Private Stochastic Gradient Descent (DP-SGD), the most popular DP training method for deep learning, realizes this protection by injecting noise during training. However previous works have found that DP-SGD often leads to a significant degradation in performance on standard image classification benchmarks. Furthermore, some authors have postulated that DP-SGD inherently performs poorly on large models, since the norm of the noise required to preserve privacy is proportional to the model dimension. In contrast, we demonstrate that DP-SGD on over-parameterized models can perform significantly better than previously thought. Combining careful hyper-parameter tuning with simple techniques to ensure signal propagation and improve the convergence rate, we obtain a new SOTA without extra data on CIFAR-10 of 81.4% under (8, 10^{-5})-DP using a 40-layer Wide-ResNet, improving over the previous SOTA of 71.7%. When fine-tuning a pre-trained NFNet-F3, we achieve a remarkable 83.8% top-1 accuracy on ImageNet under (0.5, 8*10^{-7})-DP. Additionally, we also achieve 86.7% top-1 accuracy under (8, 8 \cdot 10^{-7})-DP, which is just 4.3% below the current non-private SOTA for this task. We believe our results are a significant step towards closing the accuracy gap between private and non-private image classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.13650v2-abstract-full').style.display = 'none'; document.getElementById('2204.13650v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.04845">arXiv:2201.04845</a> <span> [<a href="https://arxiv.org/pdf/2201.04845">pdf</a>, <a href="https://arxiv.org/format/2201.04845">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Reconstructing Training Data with Informed Adversaries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Cherubin%2C+G">Giovanni Cherubin</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.04845v2-abstract-short" style="display: inline;"> Given access to a machine learning model, can an adversary reconstruct the model's training data? This work studies this question from the lens of a powerful informed adversary who knows all the training data points except one. By instantiating concrete attacks, we show it is feasible to reconstruct the remaining data point in this stringent threat model. For convex models (e.g. logistic regressio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.04845v2-abstract-full').style.display = 'inline'; document.getElementById('2201.04845v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.04845v2-abstract-full" style="display: none;"> Given access to a machine learning model, can an adversary reconstruct the model's training data? This work studies this question from the lens of a powerful informed adversary who knows all the training data points except one. By instantiating concrete attacks, we show it is feasible to reconstruct the remaining data point in this stringent threat model. For convex models (e.g. logistic regression), reconstruction attacks are simple and can be derived in closed-form. For more general models (e.g. neural networks), we propose an attack strategy based on training a reconstructor network that receives as input the weights of the model under attack and produces as output the target data point. We demonstrate the effectiveness of our attack on image classifiers trained on MNIST and CIFAR-10, and systematically investigate which factors of standard machine learning pipelines affect reconstruction success. Finally, we theoretically investigate what amount of differential privacy suffices to mitigate reconstruction attacks by informed adversaries. Our work provides an effective reconstruction attack that model developers can use to assess memorization of individual points in general settings beyond those considered in previous works (e.g. generative language models or access to training gradients); it shows that standard models have the capacity to store enough information to enable high-fidelity reconstruction of training data points; and it demonstrates that differential privacy can successfully mitigate such attacks in a parameter regime where utility degradation is minimal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.04845v2-abstract-full').style.display = 'none'; document.getElementById('2201.04845v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at "2022 IEEE Symposium on Security and Privacy (SP)"</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.02265">arXiv:2201.02265</a> <span> [<a href="https://arxiv.org/pdf/2201.02265">pdf</a>, <a href="https://arxiv.org/format/2201.02265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning to be adversarially robust and differentially private </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hayes%2C+J">Jamie Hayes</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+M+P">M. Pawan Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.02265v1-abstract-short" style="display: inline;"> We study the difficulties in learning that arise from robust and differentially private optimization. We first study convergence of gradient descent based adversarial training with differential privacy, taking a simple binary classification task on linearly separable data as an illustrative example. We compare the gap between adversarial and nominal risk in both private and non-private settings, s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.02265v1-abstract-full').style.display = 'inline'; document.getElementById('2201.02265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.02265v1-abstract-full" style="display: none;"> We study the difficulties in learning that arise from robust and differentially private optimization. We first study convergence of gradient descent based adversarial training with differential privacy, taking a simple binary classification task on linearly separable data as an illustrative example. We compare the gap between adversarial and nominal risk in both private and non-private settings, showing that the data dimensionality dependent term introduced by private optimization compounds the difficulties of learning a robust model. After this, we discuss what parts of adversarial training and differential privacy hurt optimization, identifying that the size of adversarial perturbation and clipping norm in differential privacy both increase the curvature of the loss landscape, implying poorer generalization performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.02265v1-abstract-full').style.display = 'none'; document.getElementById('2201.02265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preliminary work appeared at PPML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.04359">arXiv:2112.04359</a> <span> [<a href="https://arxiv.org/pdf/2112.04359">pdf</a>, <a href="https://arxiv.org/format/2112.04359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Ethical and social risks of harm from Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Weidinger%2C+L">Laura Weidinger</a>, <a href="/search/cs?searchtype=author&query=Mellor%2C+J">John Mellor</a>, <a href="/search/cs?searchtype=author&query=Rauh%2C+M">Maribeth Rauh</a>, <a href="/search/cs?searchtype=author&query=Griffin%2C+C">Conor Griffin</a>, <a href="/search/cs?searchtype=author&query=Uesato%2C+J">Jonathan Uesato</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Po-Sen Huang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Myra Cheng</a>, <a href="/search/cs?searchtype=author&query=Glaese%2C+M">Mia Glaese</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Kasirzadeh%2C+A">Atoosa Kasirzadeh</a>, <a href="/search/cs?searchtype=author&query=Kenton%2C+Z">Zac Kenton</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+S">Sasha Brown</a>, <a href="/search/cs?searchtype=author&query=Hawkins%2C+W">Will Hawkins</a>, <a href="/search/cs?searchtype=author&query=Stepleton%2C+T">Tom Stepleton</a>, <a href="/search/cs?searchtype=author&query=Biles%2C+C">Courtney Biles</a>, <a href="/search/cs?searchtype=author&query=Birhane%2C+A">Abeba Birhane</a>, <a href="/search/cs?searchtype=author&query=Haas%2C+J">Julia Haas</a>, <a href="/search/cs?searchtype=author&query=Rimell%2C+L">Laura Rimell</a>, <a href="/search/cs?searchtype=author&query=Hendricks%2C+L+A">Lisa Anne Hendricks</a>, <a href="/search/cs?searchtype=author&query=Isaac%2C+W">William Isaac</a>, <a href="/search/cs?searchtype=author&query=Legassick%2C+S">Sean Legassick</a>, <a href="/search/cs?searchtype=author&query=Irving%2C+G">Geoffrey Irving</a>, <a href="/search/cs?searchtype=author&query=Gabriel%2C+I">Iason Gabriel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.04359v1-abstract-short" style="display: inline;"> This paper aims to help structure the risk landscape associated with large-scale Language Models (LMs). In order to foster advances in responsible innovation, an in-depth understanding of the potential risks posed by these models is needed. A wide range of established and anticipated risks are analysed in detail, drawing on multidisciplinary expertise and literature from computer science, linguist… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.04359v1-abstract-full').style.display = 'inline'; document.getElementById('2112.04359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.04359v1-abstract-full" style="display: none;"> This paper aims to help structure the risk landscape associated with large-scale Language Models (LMs). In order to foster advances in responsible innovation, an in-depth understanding of the potential risks posed by these models is needed. A wide range of established and anticipated risks are analysed in detail, drawing on multidisciplinary expertise and literature from computer science, linguistics, and social sciences. We outline six specific risk areas: I. Discrimination, Exclusion and Toxicity, II. Information Hazards, III. Misinformation Harms, V. Malicious Uses, V. Human-Computer Interaction Harms, VI. Automation, Access, and Environmental Harms. The first area concerns the perpetuation of stereotypes, unfair discrimination, exclusionary norms, toxic language, and lower performance by social group for LMs. The second focuses on risks from private data leaks or LMs correctly inferring sensitive information. The third addresses risks arising from poor, false or misleading information including in sensitive domains, and knock-on risks such as the erosion of trust in shared information. The fourth considers risks from actors who try to use LMs to cause harm. The fifth focuses on risks specific to LLMs used to underpin conversational agents that interact with human users, including unsafe use, manipulation or deception. The sixth discusses the risk of environmental harm, job automation, and other challenges that may have a disparate effect on different social groups or communities. In total, we review 21 risks in-depth. We discuss the points of origin of different risks and point to potential mitigation approaches. Lastly, we discuss organisational responsibilities in implementing mitigations, and the role of collaboration and participation. We highlight directions for further research, particularly on expanding the toolkit for assessing and evaluating the outlined risks in LMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.04359v1-abstract-full').style.display = 'none'; document.getElementById('2112.04359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.08093">arXiv:2102.08093</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Law of Robustness for Weight-bounded Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Husain%2C+H">Hisham Husain</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.08093v2-abstract-short" style="display: inline;"> Robustness of deep neural networks against adversarial perturbations is a pressing concern motivated by recent findings showing the pervasive nature of such vulnerabilities. One method of characterizing the robustness of a neural network model is through its Lipschitz constant, which forms a robustness certificate. A natural question to ask is, for a fixed model class (such as neural networks) and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.08093v2-abstract-full').style.display = 'inline'; document.getElementById('2102.08093v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.08093v2-abstract-full" style="display: none;"> Robustness of deep neural networks against adversarial perturbations is a pressing concern motivated by recent findings showing the pervasive nature of such vulnerabilities. One method of characterizing the robustness of a neural network model is through its Lipschitz constant, which forms a robustness certificate. A natural question to ask is, for a fixed model class (such as neural networks) and a dataset of size $n$, what is the smallest achievable Lipschitz constant among all models that fit the dataset? Recently, (Bubeck et al., 2020) conjectured that when using two-layer networks with $k$ neurons to fit a generic dataset, the smallest Lipschitz constant is $惟(\sqrt{\frac{n}{k}})$. This implies that one would require one neuron per data point to robustly fit the data. In this work we derive a lower bound on the Lipschitz constant for any arbitrary model class with bounded Rademacher complexity. Our result coincides with that conjectured in (Bubeck et al., 2020) for two-layer networks under the assumption of bounded weights. However, due to our result's generality, we also derive bounds for multi-layer neural networks, discovering that one requires $\log n$ constant-sized layers to robustly fit the data. Thus, our work establishes a law of robustness for weight bounded neural networks and provides formal evidence on the necessity of over-parametrization in deep learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.08093v2-abstract-full').style.display = 'none'; document.getElementById('2102.08093v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The main result does not resolve the conjecture as claimed. However the proof technique can be used to obtain a weaker result. The manuscript will be updated at a later date</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.06860">arXiv:2102.06860</a> <span> [<a href="https://arxiv.org/pdf/2102.06860">pdf</a>, <a href="https://arxiv.org/ps/2102.06860">ps</a>, <a href="https://arxiv.org/format/2102.06860">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Optimal Spectral-Norm Approximate Minimization of Weighted Finite Automata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Lacroce%2C+C">Clara Lacroce</a>, <a href="/search/cs?searchtype=author&query=Panangaden%2C+P">Prakash Panangaden</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a>, <a href="/search/cs?searchtype=author&query=Rabusseau%2C+G">Guillaume Rabusseau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.06860v3-abstract-short" style="display: inline;"> We address the approximate minimization problem for weighted finite automata (WFAs) with weights in $\mathbb{R}$, over a one-letter alphabet: to compute the best possible approximation of a WFA given a bound on the number of states. This work is grounded in Adamyan-Arov-Krein Approximation theory, a remarkable collection of results on the approximation of Hankel operators. In addition to its intri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06860v3-abstract-full').style.display = 'inline'; document.getElementById('2102.06860v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.06860v3-abstract-full" style="display: none;"> We address the approximate minimization problem for weighted finite automata (WFAs) with weights in $\mathbb{R}$, over a one-letter alphabet: to compute the best possible approximation of a WFA given a bound on the number of states. This work is grounded in Adamyan-Arov-Krein Approximation theory, a remarkable collection of results on the approximation of Hankel operators. In addition to its intrinsic mathematical relevance, this theory has proven to be very effective for model reduction. We adapt these results to the framework of weighted automata over a one-letter alphabet. We provide theoretical guarantees and bounds on the quality of the approximation in the spectral and $\ell^2$ norm. We develop an algorithm that, based on the properties of Hankel operators, returns the optimal approximation in the spectral norm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06860v3-abstract-full').style.display = 'none'; document.getElementById('2102.06860v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Full version of ICALP2021 paper, authors are listed in alphabetical order</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.09052">arXiv:2009.09052</a> <span> [<a href="https://arxiv.org/pdf/2009.09052">pdf</a>, <a href="https://arxiv.org/ps/2009.09052">ps</a>, <a href="https://arxiv.org/format/2009.09052">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Private Reinforcement Learning with PAC and Regret Guarantees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vietri%2C+G">Giuseppe Vietri</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+A">Akshay Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+S">Zhiwei Steven Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.09052v1-abstract-short" style="display: inline;"> Motivated by high-stakes decision-making domains like personalized medicine where user information is inherently sensitive, we design privacy preserving exploration policies for episodic reinforcement learning (RL). We first provide a meaningful privacy formulation using the notion of joint differential privacy (JDP)--a strong variant of differential privacy for settings where each user receives t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.09052v1-abstract-full').style.display = 'inline'; document.getElementById('2009.09052v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.09052v1-abstract-full" style="display: none;"> Motivated by high-stakes decision-making domains like personalized medicine where user information is inherently sensitive, we design privacy preserving exploration policies for episodic reinforcement learning (RL). We first provide a meaningful privacy formulation using the notion of joint differential privacy (JDP)--a strong variant of differential privacy for settings where each user receives their own sets of output (e.g., policy recommendations). We then develop a private optimism-based learning algorithm that simultaneously achieves strong PAC and regret bounds, and enjoys a JDP guarantee. Our algorithm only pays for a moderate privacy cost on exploration: in comparison to the non-private bounds, the privacy parameter only appears in lower-order terms. Finally, we present lower bounds on sample complexity and regret for reinforcement learning subject to JDP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.09052v1-abstract-full').style.display = 'none'; document.getElementById('2009.09052v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.06605">arXiv:2007.06605</a> <span> [<a href="https://arxiv.org/pdf/2007.06605">pdf</a>, <a href="https://arxiv.org/format/2007.06605">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Privacy Amplification via Random Check-Ins </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Kairouz%2C+P">Peter Kairouz</a>, <a href="/search/cs?searchtype=author&query=McMahan%2C+H+B">H. Brendan McMahan</a>, <a href="/search/cs?searchtype=author&query=Thakkar%2C+O">Om Thakkar</a>, <a href="/search/cs?searchtype=author&query=Thakurta%2C+A">Abhradeep Thakurta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.06605v2-abstract-short" style="display: inline;"> Differentially Private Stochastic Gradient Descent (DP-SGD) forms a fundamental building block in many applications for learning over sensitive data. Two standard approaches, privacy amplification by subsampling, and privacy amplification by shuffling, permit adding lower noise in DP-SGD than via na茂ve schemes. A key assumption in both these approaches is that the elements in the data set can be u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.06605v2-abstract-full').style.display = 'inline'; document.getElementById('2007.06605v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.06605v2-abstract-full" style="display: none;"> Differentially Private Stochastic Gradient Descent (DP-SGD) forms a fundamental building block in many applications for learning over sensitive data. Two standard approaches, privacy amplification by subsampling, and privacy amplification by shuffling, permit adding lower noise in DP-SGD than via na茂ve schemes. A key assumption in both these approaches is that the elements in the data set can be uniformly sampled, or be uniformly permuted -- constraints that may become prohibitive when the data is processed in a decentralized or distributed fashion. In this paper, we focus on conducting iterative methods like DP-SGD in the setting of federated learning (FL) wherein the data is distributed among many devices (clients). Our main contribution is the \emph{random check-in} distributed protocol, which crucially relies only on randomized participation decisions made locally and independently by each client. It has privacy/accuracy trade-offs similar to privacy amplification by subsampling/shuffling. However, our method does not require server-initiated communication, or even knowledge of the population size. To our knowledge, this is the first privacy amplification tailored for a distributed learning framework, and it may have broader applicability beyond FL. Along the way, we extend privacy amplification by shuffling to incorporate $(蔚,未)$-DP local randomizers, and exponentially improve its guarantees. In practical regimes, this improvement allows for similar privacy and utility using data from an order of magnitude fewer users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.06605v2-abstract-full').style.display = 'none'; document.getElementById('2007.06605v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Updated proof for $(蔚_0, 未_0)$-DP local randomizers</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.00817">arXiv:2002.00817</a> <span> [<a href="https://arxiv.org/pdf/2002.00817">pdf</a>, <a href="https://arxiv.org/format/2002.00817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3372297.3417242">10.1145/3372297.3417242 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Private Summation in the Multi-Message Shuffle Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Bell%2C+J">James Bell</a>, <a href="/search/cs?searchtype=author&query=Gascon%2C+A">Adria Gascon</a>, <a href="/search/cs?searchtype=author&query=Nissim%2C+K">Kobbi Nissim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.00817v3-abstract-short" style="display: inline;"> The shuffle model of differential privacy (Erlingsson et al. SODA 2019; Cheu et al. EUROCRYPT 2019) and its close relative encode-shuffle-analyze (Bittau et al. SOSP 2017) provide a fertile middle ground between the well-known local and central models. Similarly to the local model, the shuffle model assumes an untrusted data collector who receives privatized messages from users, but in this case a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.00817v3-abstract-full').style.display = 'inline'; document.getElementById('2002.00817v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.00817v3-abstract-full" style="display: none;"> The shuffle model of differential privacy (Erlingsson et al. SODA 2019; Cheu et al. EUROCRYPT 2019) and its close relative encode-shuffle-analyze (Bittau et al. SOSP 2017) provide a fertile middle ground between the well-known local and central models. Similarly to the local model, the shuffle model assumes an untrusted data collector who receives privatized messages from users, but in this case a secure shuffler is used to transmit messages from users to the collector in a way that hides which messages came from which user. An interesting feature of the shuffle model is that increasing the amount of messages sent by each user can lead to protocols with accuracies comparable to the ones achievable in the central model. In particular, for the problem of privately computing the sum of $n$ bounded real values held by $n$ different users, Cheu et al. showed that $O(\sqrt{n})$ messages per user suffice to achieve $O(1)$ error (the optimal rate in the central model), while Balle et al. (CRYPTO 2019) recently showed that a single message per user leads to $螛(n^{1/3})$ MSE (mean squared error), a rate strictly in-between what is achievable in the local and central models. This paper introduces two new protocols for summation in the shuffle model with improved accuracy and communication trade-offs. Our first contribution is a recursive construction based on the protocol from Balle et al. mentioned above, providing $\mathrm{poly}(\log \log n)$ error with $O(\log \log n)$ messages per user. The second contribution is a protocol with $O(1)$ error and $O(1)$ messages per user based on a novel analysis of the reduction from secure summation to shuffling introduced by Ishai et al. (FOCS 2006) (the original reduction required $O(\log n)$ messages per user). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.00817v3-abstract-full').style.display = 'none'; document.getElementById('2002.00817v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at CCS'20</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.08902">arXiv:1910.08902</a> <span> [<a href="https://arxiv.org/pdf/1910.08902">pdf</a>, <a href="https://arxiv.org/ps/1910.08902">ps</a>, <a href="https://arxiv.org/format/1910.08902">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Privacy- and Utility-Preserving Textual Analysis via Calibrated Multivariate Perturbations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feyisetan%2C+O">Oluwaseyi Feyisetan</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Drake%2C+T">Thomas Drake</a>, <a href="/search/cs?searchtype=author&query=Diethe%2C+T">Tom Diethe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.08902v1-abstract-short" style="display: inline;"> Accurately learning from user data while providing quantifiable privacy guarantees provides an opportunity to build better ML models while maintaining user trust. This paper presents a formal approach to carrying out privacy preserving text perturbation using the notion of dx-privacy designed to achieve geo-indistinguishability in location data. Our approach applies carefully calibrated noise to v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.08902v1-abstract-full').style.display = 'inline'; document.getElementById('1910.08902v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.08902v1-abstract-full" style="display: none;"> Accurately learning from user data while providing quantifiable privacy guarantees provides an opportunity to build better ML models while maintaining user trust. This paper presents a formal approach to carrying out privacy preserving text perturbation using the notion of dx-privacy designed to achieve geo-indistinguishability in location data. Our approach applies carefully calibrated noise to vector representation of words in a high dimension space as defined by word embedding models. We present a privacy proof that satisfies dx-privacy where the privacy parameter epsilon provides guarantees with respect to a distance metric defined by the word embedding space. We demonstrate how epsilon can be selected by analyzing plausible deniability statistics backed up by large scale analysis on GloVe and fastText embeddings. We conduct privacy audit experiments against 2 baseline models and utility experiments on 3 datasets to demonstrate the tradeoff between privacy and utility for varying values of epsilon on different task types. Our results demonstrate practical utility (< 2% utility loss for training binary classifiers) while providing better privacy guarantees than baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.08902v1-abstract-full').style.display = 'none'; document.getElementById('1910.08902v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WSDM 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.05876">arXiv:1910.05876</a> <span> [<a href="https://arxiv.org/pdf/1910.05876">pdf</a>, <a href="https://arxiv.org/format/1910.05876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Actor Critic with Differentially Private Critic </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lebensold%2C+J">Jonathan Lebensold</a>, <a href="/search/cs?searchtype=author&query=Hamilton%2C+W">William Hamilton</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.05876v1-abstract-short" style="display: inline;"> Reinforcement learning algorithms are known to be sample inefficient, and often performance on one task can be substantially improved by leveraging information (e.g., via pre-training) on other related tasks. In this work, we propose a technique to achieve such knowledge transfer in cases where agent trajectories contain sensitive or private information, such as in the healthcare domain. Our appro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.05876v1-abstract-full').style.display = 'inline'; document.getElementById('1910.05876v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.05876v1-abstract-full" style="display: none;"> Reinforcement learning algorithms are known to be sample inefficient, and often performance on one task can be substantially improved by leveraging information (e.g., via pre-training) on other related tasks. In this work, we propose a technique to achieve such knowledge transfer in cases where agent trajectories contain sensitive or private information, such as in the healthcare domain. Our approach leverages a differentially private policy evaluation algorithm to initialize an actor-critic model and improve the effectiveness of learning in downstream tasks. We empirically show this technique increases sample efficiency in resource-constrained control problems while preserving the privacy of trajectories collected in an upstream task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.05876v1-abstract-full').style.display = 'none'; document.getElementById('1910.05876v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 Pages, Presented at the Privacy in Machine Learning Workshop, NeurIPS 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.11225">arXiv:1909.11225</a> <span> [<a href="https://arxiv.org/pdf/1909.11225">pdf</a>, <a href="https://arxiv.org/ps/1909.11225">ps</a>, <a href="https://arxiv.org/format/1909.11225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Improved Summation from Shuffling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Bell%2C+J">James Bell</a>, <a href="/search/cs?searchtype=author&query=Gascon%2C+A">Adria Gascon</a>, <a href="/search/cs?searchtype=author&query=Nissim%2C+K">Kobbi Nissim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.11225v1-abstract-short" style="display: inline;"> A protocol by Ishai et al.\ (FOCS 2006) showing how to implement distributed $n$-party summation from secure shuffling has regained relevance in the context of the recently proposed \emph{shuffle model} of differential privacy, as it allows to attain the accuracy levels of the curator model at a moderate communication cost. To achieve statistical security $2^{-蟽}$, the protocol by Ishai et al.\ re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11225v1-abstract-full').style.display = 'inline'; document.getElementById('1909.11225v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.11225v1-abstract-full" style="display: none;"> A protocol by Ishai et al.\ (FOCS 2006) showing how to implement distributed $n$-party summation from secure shuffling has regained relevance in the context of the recently proposed \emph{shuffle model} of differential privacy, as it allows to attain the accuracy levels of the curator model at a moderate communication cost. To achieve statistical security $2^{-蟽}$, the protocol by Ishai et al.\ requires the number of messages sent by each party to {\em grow} logarithmically with $n$ as $O(\log n + 蟽)$. In this note we give an improved analysis achieving a dependency of the form $O(1+蟽/\log n)$. Conceptually, this addresses the intuitive question left open by Ishai et al.\ of whether the shuffling step in their protocol provides a "hiding in the crowd" amplification effect as $n$ increases. From a practical perspective, our analysis provides explicit constants and shows, for example, that the method of Ishai et al.\ applied to summation of $32$-bit numbers from $n=10^4$ parties sending $12$ messages each provides statistical security $2^{-40}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11225v1-abstract-full').style.display = 'none'; document.getElementById('1909.11225v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.09116">arXiv:1906.09116</a> <span> [<a href="https://arxiv.org/pdf/1906.09116">pdf</a>, <a href="https://arxiv.org/ps/1906.09116">ps</a>, <a href="https://arxiv.org/format/1906.09116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Summation with Multi-Message Shuffling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Bell%2C+J">James Bell</a>, <a href="/search/cs?searchtype=author&query=Gascon%2C+A">Adria Gascon</a>, <a href="/search/cs?searchtype=author&query=Nissim%2C+K">Kobbi Nissim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.09116v3-abstract-short" style="display: inline;"> In recent work, Cheu et al. (Eurocrypt 2019) proposed a protocol for $n$-party real summation in the shuffle model of differential privacy with $O_{蔚, 未}(1)$ error and $螛(蔚\sqrt{n})$ one-bit messages per party. In contrast, every local model protocol for real summation must incur error $惟(1/\sqrt{n})$, and there exist protocols matching this lower bound which require just one bit of communication… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.09116v3-abstract-full').style.display = 'inline'; document.getElementById('1906.09116v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.09116v3-abstract-full" style="display: none;"> In recent work, Cheu et al. (Eurocrypt 2019) proposed a protocol for $n$-party real summation in the shuffle model of differential privacy with $O_{蔚, 未}(1)$ error and $螛(蔚\sqrt{n})$ one-bit messages per party. In contrast, every local model protocol for real summation must incur error $惟(1/\sqrt{n})$, and there exist protocols matching this lower bound which require just one bit of communication per party. Whether this gap in number of messages is necessary was left open by Cheu et al. In this note we show a protocol with $O(1/蔚)$ error and $O(\log(n/未))$ messages of size $O(\log(n))$ per party. This protocol is based on the work of Ishai et al.\ (FOCS 2006) showing how to implement distributed summation from secure shuffling, and the observation that this allows simulating the Laplace mechanism in the shuffle model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.09116v3-abstract-full').style.display = 'none'; document.getElementById('1906.09116v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.12264">arXiv:1905.12264</a> <span> [<a href="https://arxiv.org/pdf/1905.12264">pdf</a>, <a href="https://arxiv.org/ps/1905.12264">ps</a>, <a href="https://arxiv.org/format/1905.12264">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Privacy Amplification by Mixing and Diffusion Mechanisms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Barthe%2C+G">Gilles Barthe</a>, <a href="/search/cs?searchtype=author&query=Gaboardi%2C+M">Marco Gaboardi</a>, <a href="/search/cs?searchtype=author&query=Geumlek%2C+J">Joseph Geumlek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.12264v2-abstract-short" style="display: inline;"> A fundamental result in differential privacy states that the privacy guarantees of a mechanism are preserved by any post-processing of its output. In this paper we investigate under what conditions stochastic post-processing can amplify the privacy of a mechanism. By interpreting post-processing as the application of a Markov operator, we first give a series of amplification results in terms of un… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.12264v2-abstract-full').style.display = 'inline'; document.getElementById('1905.12264v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.12264v2-abstract-full" style="display: none;"> A fundamental result in differential privacy states that the privacy guarantees of a mechanism are preserved by any post-processing of its output. In this paper we investigate under what conditions stochastic post-processing can amplify the privacy of a mechanism. By interpreting post-processing as the application of a Markov operator, we first give a series of amplification results in terms of uniform mixing properties of the Markov process defined by said operator. Next we provide amplification bounds in terms of coupling arguments which can be applied in cases where uniform mixing is not available. Finally, we introduce a new family of mechanisms based on diffusion processes which are closed under post-processing, and analyze their privacy via a novel heat flow argument. On the applied side, we generalize the analysis of "privacy amplification by iteration" in Noisy SGD and show it admits an exponential improvement in the strongly convex case, and study a mechanism based on the Ornstein-Uhlenbeck diffusion process which contains the Gaussian mechanism with optimal post-processing on bounded inputs as a special case. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.12264v2-abstract-full').style.display = 'none'; document.getElementById('1905.12264v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.11190">arXiv:1905.11190</a> <span> [<a href="https://arxiv.org/pdf/1905.11190">pdf</a>, <a href="https://arxiv.org/format/1905.11190">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Model-Agnostic Counterfactual Explanations for Consequential Decisions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karimi%2C+A">Amir-Hossein Karimi</a>, <a href="/search/cs?searchtype=author&query=Barthe%2C+G">Gilles Barthe</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Valera%2C+I">Isabel Valera</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.11190v5-abstract-short" style="display: inline;"> Predictive models are being increasingly used to support consequential decision making at the individual level in contexts such as pretrial bail and loan approval. As a result, there is increasing social and legal pressure to provide explanations that help the affected individuals not only to understand why a prediction was output, but also how to act to obtain a desired outcome. To this end, seve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.11190v5-abstract-full').style.display = 'inline'; document.getElementById('1905.11190v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.11190v5-abstract-full" style="display: none;"> Predictive models are being increasingly used to support consequential decision making at the individual level in contexts such as pretrial bail and loan approval. As a result, there is increasing social and legal pressure to provide explanations that help the affected individuals not only to understand why a prediction was output, but also how to act to obtain a desired outcome. To this end, several works have proposed optimization-based methods to generate nearest counterfactual explanations. However, these methods are often restricted to a particular subset of models (e.g., decision trees or linear models) and differentiable distance functions. In contrast, we build on standard theory and tools from formal verification and propose a novel algorithm that solves a sequence of satisfiability problems, where both the distance function (objective) and predictive model (constraints) are represented as logic formulae. As shown by our experiments on real-world data, our algorithm is: i) model-agnostic ({non-}linear, {non-}differentiable, {non-}convex); ii) data-type-agnostic (heterogeneous features); iii) distance-agnostic ($\ell_0, \ell_1, \ell_\infty$, and combinations thereof); iv) able to generate plausible and diverse counterfactuals for any sample (i.e., 100% coverage); and v) at provably optimal distances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.11190v5-abstract-full').style.display = 'none'; document.getElementById('1905.11190v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.10862">arXiv:1905.10862</a> <span> [<a href="https://arxiv.org/pdf/1905.10862">pdf</a>, <a href="https://arxiv.org/format/1905.10862">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automatic Discovery of Privacy-Utility Pareto Fronts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Avent%2C+B">Brendan Avent</a>, <a href="/search/cs?searchtype=author&query=Gonzalez%2C+J">Javier Gonzalez</a>, <a href="/search/cs?searchtype=author&query=Diethe%2C+T">Tom Diethe</a>, <a href="/search/cs?searchtype=author&query=Paleyes%2C+A">Andrei Paleyes</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.10862v4-abstract-short" style="display: inline;"> Differential privacy is a mathematical framework for privacy-preserving data analysis. Changing the hyperparameters of a differentially private algorithm allows one to trade off privacy and utility in a principled way. Quantifying this trade-off in advance is essential to decision-makers tasked with deciding how much privacy can be provided in a particular application while maintaining acceptable… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.10862v4-abstract-full').style.display = 'inline'; document.getElementById('1905.10862v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.10862v4-abstract-full" style="display: none;"> Differential privacy is a mathematical framework for privacy-preserving data analysis. Changing the hyperparameters of a differentially private algorithm allows one to trade off privacy and utility in a principled way. Quantifying this trade-off in advance is essential to decision-makers tasked with deciding how much privacy can be provided in a particular application while maintaining acceptable utility. Analytical utility guarantees offer a rigorous tool to reason about this trade-off, but are generally only available for relatively simple problems. For more complex tasks, such as training neural networks under differential privacy, the utility achieved by a given algorithm can only be measured empirically. This paper presents a Bayesian optimization methodology for efficiently characterizing the privacy--utility trade-off of any differentially private algorithm using only empirical measurements of its utility. The versatility of our method is illustrated on a number of machine learning tasks involving multiple models, optimizers, and datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.10862v4-abstract-full').style.display = 'none'; document.getElementById('1905.10862v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings on Privacy Enhancing Technologies 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.09982">arXiv:1905.09982</a> <span> [<a href="https://arxiv.org/pdf/1905.09982">pdf</a>, <a href="https://arxiv.org/format/1905.09982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Hypothesis Testing Interpretations and Renyi Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Barthe%2C+G">Gilles Barthe</a>, <a href="/search/cs?searchtype=author&query=Gaboardi%2C+M">Marco Gaboardi</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+J">Justin Hsu</a>, <a href="/search/cs?searchtype=author&query=Sato%2C+T">Tetsuya Sato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.09982v2-abstract-short" style="display: inline;"> Differential privacy is a de facto standard in data privacy, with applications in the public and private sectors. A way to explain differential privacy, which is particularly appealing to statistician and social scientists is by means of its statistical hypothesis testing interpretation. Informally, one cannot effectively test whether a specific individual has contributed her data by observing the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.09982v2-abstract-full').style.display = 'inline'; document.getElementById('1905.09982v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.09982v2-abstract-full" style="display: none;"> Differential privacy is a de facto standard in data privacy, with applications in the public and private sectors. A way to explain differential privacy, which is particularly appealing to statistician and social scientists is by means of its statistical hypothesis testing interpretation. Informally, one cannot effectively test whether a specific individual has contributed her data by observing the output of a private mechanism---any test cannot have both high significance and high power. In this paper, we identify some conditions under which a privacy definition given in terms of a statistical divergence satisfies a similar interpretation. These conditions are useful to analyze the distinguishability power of divergences and we use them to study the hypothesis testing interpretation of some relaxations of differential privacy based on Renyi divergence. This analysis also results in an improved conversion rule between these definitions and differential privacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.09982v2-abstract-full').style.display = 'none'; document.getElementById('1905.09982v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics, PMLR 108:2496-2506, 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.11112">arXiv:1903.11112</a> <span> [<a href="https://arxiv.org/pdf/1903.11112">pdf</a>, <a href="https://arxiv.org/format/1903.11112">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Privacy-preserving Active Learning on Sensitive Data for User Intent Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feyisetan%2C+O">Oluwaseyi Feyisetan</a>, <a href="/search/cs?searchtype=author&query=Drake%2C+T">Thomas Drake</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Diethe%2C+T">Tom Diethe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.11112v1-abstract-short" style="display: inline;"> Active learning holds promise of significantly reducing data annotation costs while maintaining reasonable model performance. However, it requires sending data to annotators for labeling. This presents a possible privacy leak when the training set includes sensitive user data. In this paper, we describe an approach for carrying out privacy preserving active learning with quantifiable guarantees. W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11112v1-abstract-full').style.display = 'inline'; document.getElementById('1903.11112v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.11112v1-abstract-full" style="display: none;"> Active learning holds promise of significantly reducing data annotation costs while maintaining reasonable model performance. However, it requires sending data to annotators for labeling. This presents a possible privacy leak when the training set includes sensitive user data. In this paper, we describe an approach for carrying out privacy preserving active learning with quantifiable guarantees. We evaluate our approach by showing the tradeoff between privacy, utility and annotation budget on a binary classification task in a active learning setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11112v1-abstract-full').style.display = 'none'; document.getElementById('1903.11112v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at PAL: Privacy-Enhancing Artificial Intelligence and Language Technologies as part of the AAAI Spring Symposium Series (AAAI-SSS 2019)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.05202">arXiv:1903.05202</a> <span> [<a href="https://arxiv.org/pdf/1903.05202">pdf</a>, <a href="https://arxiv.org/format/1903.05202">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Continual Learning in Practice </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Diethe%2C+T">Tom Diethe</a>, <a href="/search/cs?searchtype=author&query=Borchert%2C+T">Tom Borchert</a>, <a href="/search/cs?searchtype=author&query=Thereska%2C+E">Eno Thereska</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Lawrence%2C+N">Neil Lawrence</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.05202v2-abstract-short" style="display: inline;"> This paper describes a reference architecture for self-maintaining systems that can learn continually, as data arrives. In environments where data evolves, we need architectures that manage Machine Learning (ML) models in production, adapt to shifting data distributions, cope with outliers, retrain when necessary, and adapt to new tasks. This represents continual AutoML or Automatically Adaptive M… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.05202v2-abstract-full').style.display = 'inline'; document.getElementById('1903.05202v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.05202v2-abstract-full" style="display: none;"> This paper describes a reference architecture for self-maintaining systems that can learn continually, as data arrives. In environments where data evolves, we need architectures that manage Machine Learning (ML) models in production, adapt to shifting data distributions, cope with outliers, retrain when necessary, and adapt to new tasks. This represents continual AutoML or Automatically Adaptive Machine Learning. We describe the challenges and proposes a reference architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.05202v2-abstract-full').style.display = 'none'; document.getElementById('1903.05202v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at the NeurIPS 2018 workshop on Continual Learning https://sites.google.com/view/continual2018/home</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.02837">arXiv:1903.02837</a> <span> [<a href="https://arxiv.org/pdf/1903.02837">pdf</a>, <a href="https://arxiv.org/format/1903.02837">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> The Privacy Blanket of the Shuffle Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Bell%2C+J">James Bell</a>, <a href="/search/cs?searchtype=author&query=Gascon%2C+A">Adria Gascon</a>, <a href="/search/cs?searchtype=author&query=Nissim%2C+K">Kobbi Nissim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.02837v2-abstract-short" style="display: inline;"> This work studies differential privacy in the context of the recently proposed shuffle model. Unlike in the local model, where the server collecting privatized data from users can track back an input to a specific user, in the shuffle model users submit their privatized inputs to a server anonymously. This setup yields a trust model which sits in between the classical curator and local models for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.02837v2-abstract-full').style.display = 'inline'; document.getElementById('1903.02837v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.02837v2-abstract-full" style="display: none;"> This work studies differential privacy in the context of the recently proposed shuffle model. Unlike in the local model, where the server collecting privatized data from users can track back an input to a specific user, in the shuffle model users submit their privatized inputs to a server anonymously. This setup yields a trust model which sits in between the classical curator and local models for differential privacy. The shuffle model is the core idea in the Encode, Shuffle, Analyze (ESA) model introduced by Bittau et al. (SOPS 2017). Recent work by Cheu et al. (EUROCRYPT 2019) analyzes the differential privacy properties of the shuffle model and shows that in some cases shuffled protocols provide strictly better accuracy than local protocols. Additionally, Erlingsson et al. (SODA 2019) provide a privacy amplification bound quantifying the level of curator differential privacy achieved by the shuffle model in terms of the local differential privacy of the randomizer used by each user. In this context, we make three contributions. First, we provide an optimal single message protocol for summation of real numbers in the shuffle model. Our protocol is very simple and has better accuracy and communication than the protocols for this same problem proposed by Cheu et al. Optimality of this protocol follows from our second contribution, a new lower bound for the accuracy of private protocols for summation of real numbers in the shuffle model. The third contribution is a new amplification bound for analyzing the privacy of protocols in the shuffle model in terms of the privacy provided by the corresponding local randomizer. Our amplification bound generalizes the results by Erlingsson et al. to a wider range of parameters, and provides a whole family of methods to analyze privacy amplification in the shuffle model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.02837v2-abstract-full').style.display = 'none'; document.getElementById('1903.02837v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.07468">arXiv:1810.07468</a> <span> [<a href="https://arxiv.org/pdf/1810.07468">pdf</a>, <a href="https://arxiv.org/format/1810.07468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Methods of Moments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ruffini%2C+M">Matteo Ruffini</a>, <a href="/search/cs?searchtype=author&query=Rabusseau%2C+G">Guillaume Rabusseau</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.07468v1-abstract-short" style="display: inline;"> Spectral methods of moments provide a powerful tool for learning the parameters of latent variable models. Despite their theoretical appeal, the applicability of these methods to real data is still limited due to a lack of robustness to model misspecification. In this paper we present a hierarchical approach to methods of moments to circumvent such limitations. Our method is based on replacing the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.07468v1-abstract-full').style.display = 'inline'; document.getElementById('1810.07468v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.07468v1-abstract-full" style="display: none;"> Spectral methods of moments provide a powerful tool for learning the parameters of latent variable models. Despite their theoretical appeal, the applicability of these methods to real data is still limited due to a lack of robustness to model misspecification. In this paper we present a hierarchical approach to methods of moments to circumvent such limitations. Our method is based on replacing the tensor decomposition step used in previous algorithms with approximate joint diagonalization. Experiments on topic modeling show that our method outperforms previous tensor decomposition methods in terms of speed and model quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.07468v1-abstract-full').style.display = 'none'; document.getElementById('1810.07468v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NIPS 2017</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1808.00087">arXiv:1808.00087</a> <span> [<a href="https://arxiv.org/pdf/1808.00087">pdf</a>, <a href="https://arxiv.org/format/1808.00087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Subsampled R茅nyi Differential Privacy and Analytical Moments Accountant </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu-Xiang Wang</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Kasiviswanathan%2C+S">Shiva Kasiviswanathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1808.00087v2-abstract-short" style="display: inline;"> We study the problem of subsampling in differential privacy (DP), a question that is the centerpiece behind many successful differentially private machine learning algorithms. Specifically, we provide a tight upper bound on the R茅nyi Differential Privacy (RDP) (Mironov, 2017) parameters for algorithms that: (1) subsample the dataset, and then (2) applies a randomized mechanism M to the subsample,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1808.00087v2-abstract-full').style.display = 'inline'; document.getElementById('1808.00087v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1808.00087v2-abstract-full" style="display: none;"> We study the problem of subsampling in differential privacy (DP), a question that is the centerpiece behind many successful differentially private machine learning algorithms. Specifically, we provide a tight upper bound on the R茅nyi Differential Privacy (RDP) (Mironov, 2017) parameters for algorithms that: (1) subsample the dataset, and then (2) applies a randomized mechanism M to the subsample, in terms of the RDP parameters of M and the subsampling probability parameter. Our results generalize the moments accounting technique, developed by Abadi et al. (2016) for the Gaussian mechanism, to any subsampled RDP mechanism. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1808.00087v2-abstract-full').style.display = 'none'; document.getElementById('1808.00087v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1807.01647">arXiv:1807.01647</a> <span> [<a href="https://arxiv.org/pdf/1807.01647">pdf</a>, <a href="https://arxiv.org/format/1807.01647">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Privacy Amplification by Subsampling: Tight Analyses via Couplings and Divergences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Barthe%2C+G">Gilles Barthe</a>, <a href="/search/cs?searchtype=author&query=Gaboardi%2C+M">Marco Gaboardi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1807.01647v2-abstract-short" style="display: inline;"> Differential privacy comes equipped with multiple analytical tools for the design of private data analyses. One important tool is the so-called "privacy amplification by subsampling" principle, which ensures that a differentially private mechanism run on a random subsample of a population provides higher privacy guarantees than when run on the entire population. Several instances of this principle… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.01647v2-abstract-full').style.display = 'inline'; document.getElementById('1807.01647v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1807.01647v2-abstract-full" style="display: none;"> Differential privacy comes equipped with multiple analytical tools for the design of private data analyses. One important tool is the so-called "privacy amplification by subsampling" principle, which ensures that a differentially private mechanism run on a random subsample of a population provides higher privacy guarantees than when run on the entire population. Several instances of this principle have been studied for different random subsampling methods, each with an ad-hoc analysis. In this paper we present a general method that recovers and improves prior analyses, yields lower bounds and derives new instances of privacy amplification by subsampling. Our method leverages a characterization of differential privacy as a divergence which emerged in the program verification community. Furthermore, it introduces new tools, including advanced joint convexity and privacy profiles, which might be of independent interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.01647v2-abstract-full').style.display = 'none'; document.getElementById('1807.01647v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in NeurIPS 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.06530">arXiv:1805.06530</a> <span> [<a href="https://arxiv.org/pdf/1805.06530">pdf</a>, <a href="https://arxiv.org/format/1805.06530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Improving the Gaussian Mechanism for Differential Privacy: Analytical Calibration and Optimal Denoising </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu-Xiang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.06530v2-abstract-short" style="display: inline;"> The Gaussian mechanism is an essential building block used in multitude of differentially private data analysis algorithms. In this paper we revisit the Gaussian mechanism and show that the original analysis has several important limitations. Our analysis reveals that the variance formula for the original mechanism is far from tight in the high privacy regime ($\varepsilon \to 0$) and it cannot be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.06530v2-abstract-full').style.display = 'inline'; document.getElementById('1805.06530v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.06530v2-abstract-full" style="display: none;"> The Gaussian mechanism is an essential building block used in multitude of differentially private data analysis algorithms. In this paper we revisit the Gaussian mechanism and show that the original analysis has several important limitations. Our analysis reveals that the variance formula for the original mechanism is far from tight in the high privacy regime ($\varepsilon \to 0$) and it cannot be extended to the low privacy regime ($\varepsilon \to \infty$). We address these limitations by developing an optimal Gaussian mechanism whose variance is calibrated directly using the Gaussian cumulative density function instead of a tail bound approximation. We also propose to equip the Gaussian mechanism with a post-processing step based on adaptive estimation techniques by leveraging that the distribution of the perturbation is known. Our experiments show that analytical calibration removes at least a third of the variance of the noise compared to the classical Gaussian mechanism, and that denoising dramatically improves the accuracy of the Gaussian mechanism in the high-dimensional regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.06530v2-abstract-full').style.display = 'none'; document.getElementById('1805.06530v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at the 35th International Conference on Machine Learning (ICML), 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1711.05994">arXiv:1711.05994</a> <span> [<a href="https://arxiv.org/pdf/1711.05994">pdf</a>, <a href="https://arxiv.org/ps/1711.05994">ps</a>, <a href="https://arxiv.org/format/1711.05994">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1017/S0960129519000094">10.1017/S0960129519000094 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Singular value automata and approximate minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Panangaden%2C+P">Prakash Panangaden</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1711.05994v2-abstract-short" style="display: inline;"> The present paper uses spectral theory of linear operators to construct approximately minimal realizations of weighted languages. Our new contributions are: (i) a new algorithm for the SVD decomposition of infinite Hankel matrices based on their representation in terms of weighted automata, (ii) a new canonical form for weighted automata arising from the SVD of its corresponding Hankel matrix and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.05994v2-abstract-full').style.display = 'inline'; document.getElementById('1711.05994v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1711.05994v2-abstract-full" style="display: none;"> The present paper uses spectral theory of linear operators to construct approximately minimal realizations of weighted languages. Our new contributions are: (i) a new algorithm for the SVD decomposition of infinite Hankel matrices based on their representation in terms of weighted automata, (ii) a new canonical form for weighted automata arising from the SVD of its corresponding Hankel matrix and (iii) an algorithm to construct approximate minimizations of given weighted automata by truncating the canonical form. We give bounds on the quality of our approximation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.05994v2-abstract-full').style.display = 'none'; document.getElementById('1711.05994v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Math. Struct. Comp. Sci. 29 (2019) 1444-1478 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1702.08017">arXiv:1702.08017</a> <span> [<a href="https://arxiv.org/pdf/1702.08017">pdf</a>, <a href="https://arxiv.org/ps/1702.08017">ps</a>, <a href="https://arxiv.org/format/1702.08017">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Bisimulation Metrics for Weighted Automata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Gourdeau%2C+P">Pascale Gourdeau</a>, <a href="/search/cs?searchtype=author&query=Panangaden%2C+P">Prakash Panangaden</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1702.08017v2-abstract-short" style="display: inline;"> We develop a new bisimulation (pseudo)metric for weighted finite automata (WFA) that generalizes Boreale's linear bisimulation relation. Our metrics are induced by seminorms on the state space of WFA. Our development is based on spectral properties of sets of linear operators. In particular, the joint spectral radius of the transition matrices of WFA plays a central role. We also study continuity… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1702.08017v2-abstract-full').style.display = 'inline'; document.getElementById('1702.08017v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1702.08017v2-abstract-full" style="display: none;"> We develop a new bisimulation (pseudo)metric for weighted finite automata (WFA) that generalizes Boreale's linear bisimulation relation. Our metrics are induced by seminorms on the state space of WFA. Our development is based on spectral properties of sets of linear operators. In particular, the joint spectral radius of the transition matrices of WFA plays a central role. We also study continuity properties of the bisimulation pseudometric, establish an undecidability result for computing the metric, and give a preliminary account of applications to spectral learning of weighted automata. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1702.08017v2-abstract-full').style.display = 'none'; document.getElementById('1702.08017v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 February, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1610.07883">arXiv:1610.07883</a> <span> [<a href="https://arxiv.org/pdf/1610.07883">pdf</a>, <a href="https://arxiv.org/format/1610.07883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Generalization Bounds for Weighted Automata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Mohri%2C+M">Mehryar Mohri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1610.07883v1-abstract-short" style="display: inline;"> This paper studies the problem of learning weighted automata from a finite labeled training sample. We consider several general families of weighted automata defined in terms of three different measures: the norm of an automaton's weights, the norm of the function computed by an automaton, or the norm of the corresponding Hankel matrix. We present new data-dependent generalization guarantees for l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1610.07883v1-abstract-full').style.display = 'inline'; document.getElementById('1610.07883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1610.07883v1-abstract-full" style="display: none;"> This paper studies the problem of learning weighted automata from a finite labeled training sample. We consider several general families of weighted automata defined in terms of three different measures: the norm of an automaton's weights, the norm of the function computed by an automaton, or the norm of the corresponding Hankel matrix. We present new data-dependent generalization guarantees for learning weighted automata expressed in terms of the Rademacher complexity of these families. We further present upper bounds on these Rademacher complexities, which reveal key new data-dependent terms related to the complexity of learning weighted automata. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1610.07883v1-abstract-full').style.display = 'none'; document.getElementById('1610.07883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1603.02010">arXiv:1603.02010</a> <span> [<a href="https://arxiv.org/pdf/1603.02010">pdf</a>, <a href="https://arxiv.org/format/1603.02010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Policy Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Gomrokchi%2C+M">Maziar Gomrokchi</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1603.02010v1-abstract-short" style="display: inline;"> We present the first differentially private algorithms for reinforcement learning, which apply to the task of evaluating a fixed policy. We establish two approaches for achieving differential privacy, provide a theoretical analysis of the privacy and utility of the two algorithms, and show promising results on simple empirical examples. </span> <span class="abstract-full has-text-grey-dark mathjax" id="1603.02010v1-abstract-full" style="display: none;"> We present the first differentially private algorithms for reinforcement learning, which apply to the task of evaluating a fixed policy. We establish two approaches for achieving differential privacy, provide a theoretical analysis of the privacy and utility of the two algorithms, and show promising results on simple empirical examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1603.02010v1-abstract-full').style.display = 'none'; document.getElementById('1603.02010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1511.01442">arXiv:1511.01442</a> <span> [<a href="https://arxiv.org/pdf/1511.01442">pdf</a>, <a href="https://arxiv.org/format/1511.01442">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Low-Rank Approximation of Weighted Tree Automata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rabusseau%2C+G">Guillaume Rabusseau</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1511.01442v2-abstract-short" style="display: inline;"> We describe a technique to minimize weighted tree automata (WTA), a powerful formalisms that subsumes probabilistic context-free grammars (PCFGs) and latent-variable PCFGs. Our method relies on a singular value decomposition of the underlying Hankel matrix defined by the WTA. Our main theoretical result is an efficient algorithm for computing the SVD of an infinite Hankel matrix implicitly represe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1511.01442v2-abstract-full').style.display = 'inline'; document.getElementById('1511.01442v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1511.01442v2-abstract-full" style="display: none;"> We describe a technique to minimize weighted tree automata (WTA), a powerful formalisms that subsumes probabilistic context-free grammars (PCFGs) and latent-variable PCFGs. Our method relies on a singular value decomposition of the underlying Hankel matrix defined by the WTA. Our main theoretical result is an efficient algorithm for computing the SVD of an infinite Hankel matrix implicitly represented as a WTA. We provide an analysis of the approximation error induced by the minimization, and we evaluate our method on real-world data originating in newswire treebank. We show that the model achieves lower perplexity than previous methods for PCFG minimization, and also is much more stable due to the absence of local optima. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1511.01442v2-abstract-full').style.display = 'none'; document.getElementById('1511.01442v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in AISTATS 2016</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1504.06840">arXiv:1504.06840</a> <span> [<a href="https://arxiv.org/pdf/1504.06840">pdf</a>, <a href="https://arxiv.org/ps/1504.06840">ps</a>, <a href="https://arxiv.org/format/1504.06840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Diameter and Stationary Distribution of Random $r$-out Digraphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Addario-Berry%2C+L">Louigi Addario-Berry</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Perarnau%2C+G">Guillem Perarnau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1504.06840v1-abstract-short" style="display: inline;"> Let $D(n,r)$ be a random $r$-out regular directed multigraph on the set of vertices $\{1,\ldots,n\}$. In this work, we establish that for every $r \ge 2$, there exists $畏_r>0$ such that $\text{diam}(D(n,r))=(1+畏_r+o(1))\log_r{n}$. Our techniques also allow us to bound some extremal quantities related to the stationary distribution of a simple random walk on $D(n,r)$. In particular, we determine th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1504.06840v1-abstract-full').style.display = 'inline'; document.getElementById('1504.06840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1504.06840v1-abstract-full" style="display: none;"> Let $D(n,r)$ be a random $r$-out regular directed multigraph on the set of vertices $\{1,\ldots,n\}$. In this work, we establish that for every $r \ge 2$, there exists $畏_r>0$ such that $\text{diam}(D(n,r))=(1+畏_r+o(1))\log_r{n}$. Our techniques also allow us to bound some extremal quantities related to the stationary distribution of a simple random walk on $D(n,r)$. In particular, we determine the asymptotic behaviour of $蟺_{\max}$ and $蟺_{\min}$, the maximum and the minimum values of the stationary distribution. We show that with high probability $蟺_{\max} = n^{-1+o(1)}$ and $蟺_{\min}=n^{-(1+畏_r)+o(1)}$. Our proof shows that the vertices with $蟺(v)$ near to $蟺_{\min}$ lie at the top of "narrow, slippery towers", such vertices are also responsible for increasing the diameter from $(1+o(1))\log_r n$ to $(1+畏_r+o(1))\log_r{n}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1504.06840v1-abstract-full').style.display = 'none'; document.getElementById('1504.06840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1501.06841">arXiv:1501.06841</a> <span> [<a href="https://arxiv.org/pdf/1501.06841">pdf</a>, <a href="https://arxiv.org/format/1501.06841">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> A Canonical Form for Weighted Automata and Applications to Approximate Minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Panangaden%2C+P">Prakash Panangaden</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1501.06841v3-abstract-short" style="display: inline;"> We study the problem of constructing approximations to a weighted automaton. Weighted finite automata (WFA) are closely related to the theory of rational series. A rational series is a function from strings to real numbers that can be computed by a finite WFA. Among others, this includes probability distributions generated by hidden Markov models and probabilistic automata. The relationship betwee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1501.06841v3-abstract-full').style.display = 'inline'; document.getElementById('1501.06841v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1501.06841v3-abstract-full" style="display: none;"> We study the problem of constructing approximations to a weighted automaton. Weighted finite automata (WFA) are closely related to the theory of rational series. A rational series is a function from strings to real numbers that can be computed by a finite WFA. Among others, this includes probability distributions generated by hidden Markov models and probabilistic automata. The relationship between rational series and WFA is analogous to the relationship between regular languages and ordinary automata. Associated with such rational series are infinite matrices called Hankel matrices which play a fundamental role in the theory of minimal WFA. Our contributions are: (1) an effective procedure for computing the singular value decomposition (SVD) of such infinite Hankel matrices based on their representation in terms of finite WFA; (2) a new canonical form for finite WFA based on this SVD decomposition; and, (3) an algorithm to construct approximate minimizations of a given WFA. The goal of our approximate minimization algorithm is to start from a minimal WFA and produce a smaller WFA that is close to the given one in a certain sense. The desired size of the approximating automaton is given as input. We give bounds describing how well the approximation emulates the behavior of the original WFA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1501.06841v3-abstract-full').style.display = 'none'; document.getElementById('1501.06841v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 January, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2015. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Balle%2C+B&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Balle%2C+B&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Balle%2C+B&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository