Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 179 results for author: <span class="mathjax">Montanari, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Montanari%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Montanari, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Montanari%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Montanari, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.08028">arXiv:2503.08028</a> <span> [<a href="https://arxiv.org/pdf/2503.08028">pdf</a>, <a href="https://arxiv.org/ps/2503.08028">ps</a>, <a href="https://arxiv.org/format/2503.08028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Computational bottlenecks for denoising diffusions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+V">Viet Vu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.08028v1-abstract-short" style="display: inline;"> Denoising diffusions provide a general strategy to sample from a probability distribution $渭$ in $\mathbb{R}^d$ by constructing a stochastic process $(\hat{\boldsymbol x}_t:t\ge 0)$ in ${\mathbb R}^d$ such that the distribution of $\hat{\boldsymbol x}_T$ at large times $T$ approximates $渭$. The drift ${\boldsymbol m}:{\mathbb R}^d\times{\mathbb R}\to{\mathbb R}^d$ of this diffusion process is lear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.08028v1-abstract-full').style.display = 'inline'; document.getElementById('2503.08028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.08028v1-abstract-full" style="display: none;"> Denoising diffusions provide a general strategy to sample from a probability distribution $渭$ in $\mathbb{R}^d$ by constructing a stochastic process $(\hat{\boldsymbol x}_t:t\ge 0)$ in ${\mathbb R}^d$ such that the distribution of $\hat{\boldsymbol x}_T$ at large times $T$ approximates $渭$. The drift ${\boldsymbol m}:{\mathbb R}^d\times{\mathbb R}\to{\mathbb R}^d$ of this diffusion process is learned from data (samples from $渭$) by minimizing the so-called score-matching objective. In order for the generating process to be efficient, it must be possible to evaluate (an approximation of) ${\boldsymbol m}({\boldsymbol y},t)$ in polynomial time. Is every probability distribution $渭$, for which sampling is tractable, also amenable to sampling via diffusions? We provide evidence to the contrary by constructing a probability distribution $渭$ for which sampling is easy, but the drift of the diffusion process is intractable -- under a popular conjecture on information-computation gaps in statistical estimation. We further show that any polynomial-time computable drift can be modified in a way that changes minimally the score matching objective and yet results in incorrect sampling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.08028v1-abstract-full').style.display = 'none'; document.getElementById('2503.08028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.05993">arXiv:2503.05993</a> <span> [<a href="https://arxiv.org/pdf/2503.05993">pdf</a>, <a href="https://arxiv.org/format/2503.05993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> SODAs: Sparse Optimization for the Discovery of Differential and Algebraic Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jayadharan%2C+M">Manu Jayadharan</a>, <a href="/search/cs?searchtype=author&query=Catlett%2C+C">Christina Catlett</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A+N">Arthur N. Montanari</a>, <a href="/search/cs?searchtype=author&query=Mangan%2C+N+M">Niall M. Mangan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.05993v1-abstract-short" style="display: inline;"> Differential-algebraic equations (DAEs) integrate ordinary differential equations (ODEs) with algebraic constraints, providing a fundamental framework for developing models of dynamical systems characterized by timescale separation, conservation laws, and physical constraints. While sparse optimization has revolutionized model development by allowing data-driven discovery of parsimonious models fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05993v1-abstract-full').style.display = 'inline'; document.getElementById('2503.05993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.05993v1-abstract-full" style="display: none;"> Differential-algebraic equations (DAEs) integrate ordinary differential equations (ODEs) with algebraic constraints, providing a fundamental framework for developing models of dynamical systems characterized by timescale separation, conservation laws, and physical constraints. While sparse optimization has revolutionized model development by allowing data-driven discovery of parsimonious models from a library of possible equations, existing approaches for dynamical systems assume DAEs can be reduced to ODEs by eliminating variables before model discovery. This assumption limits the applicability of such methods to DAE systems with unknown constraints and time scales. We introduce Sparse Optimization for Differential-Algebraic Systems (SODAs), a data-driven method for the identification of DAEs in their explicit form. By discovering the algebraic and dynamic components sequentially without prior identification of the algebraic variables, this approach leads to a sequence of convex optimization problems and has the advantage of discovering interpretable models that preserve the structure of the underlying physical system. To this end, SODAs improves numerical stability when handling high correlations between library terms -- caused by near-perfect algebraic relationships -- by iteratively refining the conditioning of the candidate library. We demonstrate the performance of our method on biological, mechanical, and electrical systems, showcasing its robustness to noise in both simulated time series and real-time experimental data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05993v1-abstract-full').style.display = 'none'; document.getElementById('2503.05993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 5 figures, Supplementary attached: 6 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 37M10; 37N99; 68T05 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; G.1.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.21269">arXiv:2502.21269</a> <span> [<a href="https://arxiv.org/pdf/2502.21269">pdf</a>, <a href="https://arxiv.org/format/2502.21269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamical Decoupling of Generalization and Overfitting in Large Two-Layer Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Urbani%2C+P">Pierfrancesco Urbani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.21269v1-abstract-short" style="display: inline;"> The inductive bias and generalization properties of large machine learning models are -- to a substantial extent -- a byproduct of the optimization algorithm used for training. Among others, the scale of the random initialization, the learning rate, and early stopping all have crucial impact on the quality of the model learnt by stochastic gradient descent or related algorithms. In order to unders… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.21269v1-abstract-full').style.display = 'inline'; document.getElementById('2502.21269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.21269v1-abstract-full" style="display: none;"> The inductive bias and generalization properties of large machine learning models are -- to a substantial extent -- a byproduct of the optimization algorithm used for training. Among others, the scale of the random initialization, the learning rate, and early stopping all have crucial impact on the quality of the model learnt by stochastic gradient descent or related algorithms. In order to understand these phenomena, we study the training dynamics of large two-layer neural networks. We use a well-established technique from non-equilibrium statistical physics (dynamical mean field theory) to obtain an asymptotic high-dimensional characterization of this dynamics. This characterization applies to a Gaussian approximation of the hidden neurons non-linearity, and empirically captures well the behavior of actual neural network models. Our analysis uncovers several interesting new phenomena in the training dynamics: $(i)$ The emergence of a slow time scale associated with the growth in Gaussian/Rademacher complexity; $(ii)$ As a consequence, algorithmic inductive bias towards small complexity, but only if the initialization has small enough complexity; $(iii)$ A separation of time scales between feature learning and overfitting; $(iv)$ A non-monotone behavior of the test error and, correspondingly, a `feature unlearning' phase at large times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.21269v1-abstract-full').style.display = 'none'; document.getElementById('2502.21269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">89 pages; 62 pdf figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01953">arXiv:2502.01953</a> <span> [<a href="https://arxiv.org/pdf/2502.01953">pdf</a>, <a href="https://arxiv.org/format/2502.01953">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Local minima of the empirical risk in high dimension: General theorems and convex examples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Asgari%2C+K">Kiana Asgari</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+B">Basil Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01953v1-abstract-short" style="display: inline;"> We consider a general model for high-dimensional empirical risk minimization whereby the data $\mathbf{x}_i$ are $d$-dimensional isotropic Gaussian vectors, the model is parametrized by $\mathbf螛\in\mathbb{R}^{d\times k}$, and the loss depends on the data via the projection $\mathbf螛^\mathsf{T}\mathbf{x}_i$. This setting covers as special cases classical statistics methods (e.g. multinomial regres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01953v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01953v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01953v1-abstract-full" style="display: none;"> We consider a general model for high-dimensional empirical risk minimization whereby the data $\mathbf{x}_i$ are $d$-dimensional isotropic Gaussian vectors, the model is parametrized by $\mathbf螛\in\mathbb{R}^{d\times k}$, and the loss depends on the data via the projection $\mathbf螛^\mathsf{T}\mathbf{x}_i$. This setting covers as special cases classical statistics methods (e.g. multinomial regression and other generalized linear models), but also two-layer fully connected neural networks with $k$ hidden neurons. We use the Kac-Rice formula from Gaussian process theory to derive a bound on the expected number of local minima of this empirical risk, under the proportional asymptotics in which $n,d\to\infty$, with $n\asymp d$. Via Markov's inequality, this bound allows to determine the positions of these minimizers (with exponential deviation bounds) and hence derive sharp asymptotics on the estimation and prediction error. In this paper, we apply our characterization to convex losses, where high-dimensional asymptotics were not (in general) rigorously established for $k\ge 2$. We show that our approach is tight and allows to prove previously conjectured results. In addition, we characterize the spectrum of the Hessian at the minimizer. A companion paper applies our general result to non-convex examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01953v1-abstract-full').style.display = 'none'; document.getElementById('2502.01953v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">95 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22757">arXiv:2410.22757</a> <span> [<a href="https://arxiv.org/pdf/2410.22757">pdf</a>, <a href="https://arxiv.org/format/2410.22757">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.4204/EPTCS.409.5">10.4204/EPTCS.409.5 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Synthesis of Timeline-Based Planning Strategies Avoiding Determinization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Acampora%2C+R">Renato Acampora</a>, <a href="/search/cs?searchtype=author&query=Della+Monica%2C+D">Dario Della Monica</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Sala%2C+P">Pietro Sala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22757v1-abstract-short" style="display: inline;"> Qualitative timeline-based planning models domains as sets of independent, but interacting, components whose behaviors over time, the timelines, are governed by sets of qualitative temporal constraints (ordering relations), called synchronization rules. Its plan-existence problem has been shown to be PSPACE-complete; in particular, PSPACE-membership has been proved via reduction to the nonemptines… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22757v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22757v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22757v1-abstract-full" style="display: none;"> Qualitative timeline-based planning models domains as sets of independent, but interacting, components whose behaviors over time, the timelines, are governed by sets of qualitative temporal constraints (ordering relations), called synchronization rules. Its plan-existence problem has been shown to be PSPACE-complete; in particular, PSPACE-membership has been proved via reduction to the nonemptiness problem for nondeterministic finite automata. However, nondeterministic automata cannot be directly used to synthesize planning strategies as a costly determinization step is needed. In this paper, we identify a large fragment of qualitative timeline-based planning whose plan-existence problem can be directly mapped into the nonemptiness problem of deterministic finite automata, which can then be exploited to synthesize strategies. In addition, we identify a maximal subset of Allen's relations that fits into such a deterministic fragment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22757v1-abstract-full').style.display = 'none'; document.getElementById('2410.22757v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings GandALF 2024, arXiv:2410.21884</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EPTCS 409, 2024, pp. 5-18 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04775">arXiv:2410.04775</a> <span> [<a href="https://arxiv.org/pdf/2410.04775">pdf</a>, <a href="https://arxiv.org/format/2410.04775">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OmniBuds: A Sensory Earable Platform for Advanced Bio-Sensing and On-Device Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Alessandro Montanari</a>, <a href="/search/cs?searchtype=author&query=Thangarajan%2C+A">Ashok Thangarajan</a>, <a href="/search/cs?searchtype=author&query=Al-Naimi%2C+K">Khaldoon Al-Naimi</a>, <a href="/search/cs?searchtype=author&query=Ferlini%2C+A">Andrea Ferlini</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Balaji%2C+A+N">Ananta Narayanan Balaji</a>, <a href="/search/cs?searchtype=author&query=Kawsar%2C+F">Fahim Kawsar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04775v1-abstract-short" style="display: inline;"> Sensory earables have evolved from basic audio enhancement devices into sophisticated platforms for clinical-grade health monitoring and wellbeing management. This paper introduces OmniBuds, an advanced sensory earable platform integrating multiple biosensors and onboard computation powered by a machine learning accelerator, all within a real-time operating system (RTOS). The platform's dual-ear s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04775v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04775v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04775v1-abstract-full" style="display: none;"> Sensory earables have evolved from basic audio enhancement devices into sophisticated platforms for clinical-grade health monitoring and wellbeing management. This paper introduces OmniBuds, an advanced sensory earable platform integrating multiple biosensors and onboard computation powered by a machine learning accelerator, all within a real-time operating system (RTOS). The platform's dual-ear symmetric design, equipped with precisely positioned kinetic, acoustic, optical, and thermal sensors, enables highly accurate and real-time physiological assessments. Unlike conventional earables that rely on external data processing, OmniBuds leverage real-time onboard computation to significantly enhance system efficiency, reduce latency, and safeguard privacy by processing data locally. This capability includes executing complex machine learning models directly on the device. We provide a comprehensive analysis of OmniBuds' design, hardware and software architecture demonstrating its capacity for multi-functional applications, accurate and robust tracking of physiological parameters, and advanced human-computer interaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04775v1-abstract-full').style.display = 'none'; document.getElementById('2410.04775v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01093">arXiv:2410.01093</a> <span> [<a href="https://arxiv.org/pdf/2410.01093">pdf</a>, <a href="https://arxiv.org/ps/2410.01093">ps</a>, <a href="https://arxiv.org/format/2410.01093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> High-dimensional logistic regression with missing data: Imputation, regularization, and universality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Verchand%2C+K+A">Kabir Aladin Verchand</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01093v1-abstract-short" style="display: inline;"> We study high-dimensional, ridge-regularized logistic regression in a setting in which the covariates may be missing or corrupted by additive noise. When both the covariates and the additive corruptions are independent and normally distributed, we provide exact characterizations of both the prediction error as well as the estimation error. Moreover, we show that these characterizations are univers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01093v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01093v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01093v1-abstract-full" style="display: none;"> We study high-dimensional, ridge-regularized logistic regression in a setting in which the covariates may be missing or corrupted by additive noise. When both the covariates and the additive corruptions are independent and normally distributed, we provide exact characterizations of both the prediction error as well as the estimation error. Moreover, we show that these characterizations are universal: as long as the entries of the data matrix satisfy a set of independence and moment conditions, our guarantees continue to hold. Universality, in turn, enables the detailed study of several imputation-based strategies when the covariates are missing completely at random. We ground our study by comparing the performance of these strategies with the conjectured performance -- stemming from replica theory in statistical physics -- of the Bayes optimal procedure. Our analysis yields several insights including: (i) a distinction between single imputation and a simple variant of multiple imputation and (ii) that adding a simple ridge regularization term to single-imputed logistic regression can yield an estimator whose prediction error is nearly indistinguishable from the Bayes optimal prediction error. We supplement our findings with extensive numerical experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01093v1-abstract-full').style.display = 'none'; document.getElementById('2410.01093v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17954">arXiv:2407.17954</a> <span> [<a href="https://arxiv.org/pdf/2407.17954">pdf</a>, <a href="https://arxiv.org/format/2407.17954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Training Data with Lossy Image Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mentzer%2C+K+L">Katherine L. Mentzer</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17954v1-abstract-short" style="display: inline;"> Empirically-determined scaling laws have been broadly successful in predicting the evolution of large machine learning models with training data and number of parameters. As a consequence, they have been useful for optimizing the allocation of limited resources, most notably compute time. In certain applications, storage space is an important constraint, and data format needs to be chosen carefu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17954v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17954v1-abstract-full" style="display: none;"> Empirically-determined scaling laws have been broadly successful in predicting the evolution of large machine learning models with training data and number of parameters. As a consequence, they have been useful for optimizing the allocation of limited resources, most notably compute time. In certain applications, storage space is an important constraint, and data format needs to be chosen carefully as a consequence. Computer vision is a prominent example: images are inherently analog, but are always stored in a digital format using a finite number of bits. Given a dataset of digital images, the number of bits $L$ to store each of them can be further reduced using lossy data compression. This, however, can degrade the quality of the model trained on such images, since each example has lower resolution. In order to capture this trade-off and optimize storage of training data, we propose a `storage scaling law' that describes the joint evolution of test error with sample size and number of bits per image. We prove that this law holds within a stylized model for image compression, and verify it empirically on two computer vision tasks, extracting the relevant parameters. We then show that this law can be used to optimize the lossy compression level. At given storage, models trained on optimally compressed images present a significantly smaller test error with respect to models trained on the original data. Finally, we investigate the potential benefits of randomizing the compression level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17954v1-abstract-full').style.display = 'none'; document.getElementById('2407.17954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 27 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02970">arXiv:2406.02970</a> <span> [<a href="https://arxiv.org/pdf/2406.02970">pdf</a>, <a href="https://arxiv.org/ps/2406.02970">ps</a>, <a href="https://arxiv.org/format/2406.02970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Which exceptional low-dimensional projections of a Gaussian point cloud can be found in polynomial time? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kangjie Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02970v1-abstract-short" style="display: inline;"> Given $d$-dimensional standard Gaussian vectors $\boldsymbol{x}_1,\dots, \boldsymbol{x}_n$, we consider the set of all empirical distributions of its $m$-dimensional projections, for $m$ a fixed constant. Diaconis and Freedman (1984) proved that, if $n/d\to \infty$, all such distributions converge to the standard Gaussian distribution. In contrast, we study the proportional asymptotics, whereby… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02970v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02970v1-abstract-full" style="display: none;"> Given $d$-dimensional standard Gaussian vectors $\boldsymbol{x}_1,\dots, \boldsymbol{x}_n$, we consider the set of all empirical distributions of its $m$-dimensional projections, for $m$ a fixed constant. Diaconis and Freedman (1984) proved that, if $n/d\to \infty$, all such distributions converge to the standard Gaussian distribution. In contrast, we study the proportional asymptotics, whereby $n,d\to \infty$ with $n/d\to 伪\in (0, \infty)$. In this case, the projection of the data points along a typical random subspace is again Gaussian, but the set $\mathscr{F}_{m,伪}$ of all probability distributions that are asymptotically feasible as $m$-dimensional projections contains non-Gaussian distributions corresponding to exceptional subspaces. Non-rigorous methods from statistical physics yield an indirect characterization of $\mathscr{F}_{m,伪}$ in terms of a generalized Parisi formula. Motivated by the goal of putting this formula on a rigorous basis, and to understand whether these projections can be found efficiently, we study the subset $\mathscr{F}^{\rm alg}_{m,伪}\subseteq \mathscr{F}_{m,伪}$ of distributions that can be realized by a class of iterative algorithms. We prove that this set is characterized by a certain stochastic optimal control problem, and obtain a dual characterization of this problem in terms of a variational principle that extends Parisi's formula. As a byproduct, we obtain computationally achievable values for a class of random optimization problems including `generalized spherical perceptron' models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02970v1-abstract-full').style.display = 'none'; document.getElementById('2406.02970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">83 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13818">arXiv:2405.13818</a> <span> [<a href="https://arxiv.org/pdf/2405.13818">pdf</a>, <a href="https://arxiv.org/format/2405.13818">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Identifiability of Differential-Algebraic Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A+N">Arthur N. Montanari</a>, <a href="/search/cs?searchtype=author&query=Lamoline%2C+F">Fran莽ois Lamoline</a>, <a href="/search/cs?searchtype=author&query=Bereza%2C+R">Robert Bereza</a>, <a href="/search/cs?searchtype=author&query=Gon%C3%A7alves%2C+J">Jorge Gon莽alves</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13818v1-abstract-short" style="display: inline;"> Data-driven modeling of dynamical systems often faces numerous data-related challenges. A fundamental requirement is the existence of a unique set of parameters for a chosen model structure, an issue commonly referred to as identifiability. Although this problem is well studied for ordinary differential equations (ODEs), few studies have focused on the more general class of systems described by di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13818v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13818v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13818v1-abstract-full" style="display: none;"> Data-driven modeling of dynamical systems often faces numerous data-related challenges. A fundamental requirement is the existence of a unique set of parameters for a chosen model structure, an issue commonly referred to as identifiability. Although this problem is well studied for ordinary differential equations (ODEs), few studies have focused on the more general class of systems described by differential-algebraic equations (DAEs). Examples of DAEs include dynamical systems with algebraic equations representing conservation laws or approximating fast dynamics. This work introduces a novel identifiability test for models characterized by nonlinear DAEs. Unlike previous approaches, our test only requires prior knowledge of the system equations and does not need nonlinear transformation, index reduction, or numerical integration of the DAEs. We employed our identifiability analysis across a diverse range of DAE models, illustrating how system identifiability depends on the choices of sensors, experimental conditions, and model structures. Given the added challenges involved in identifying DAEs when compared to ODEs, we anticipate that our findings will have broad applicability and contribute significantly to the development and validation of data-driven methods for DAEs and other structure-preserving models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13818v1-abstract-full').style.display = 'none'; document.getElementById('2405.13818v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Codes available at https://github.com/montanariarthur/IdentifiabilityDAE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01735">arXiv:2405.01735</a> <span> [<a href="https://arxiv.org/pdf/2405.01735">pdf</a>, <a href="https://arxiv.org/ps/2405.01735">ps</a>, <a href="https://arxiv.org/format/2405.01735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> On Smale's 17th problem over the reals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Subag%2C+E">Eliran Subag</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01735v2-abstract-short" style="display: inline;"> We consider the problem of efficiently solving a system of $n$ non-linear equations in ${\mathbb R}^d$. Addressing Smale's 17th problem stated in 1998, we consider a setting whereby the $n$ equations are random homogeneous polynomials of arbitrary degrees. In the complex case and for $n= d-1$, Beltr谩n and Pardo proved the existence of an efficient randomized algorithm and Lairez recently showed it… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01735v2-abstract-full').style.display = 'inline'; document.getElementById('2405.01735v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01735v2-abstract-full" style="display: none;"> We consider the problem of efficiently solving a system of $n$ non-linear equations in ${\mathbb R}^d$. Addressing Smale's 17th problem stated in 1998, we consider a setting whereby the $n$ equations are random homogeneous polynomials of arbitrary degrees. In the complex case and for $n= d-1$, Beltr谩n and Pardo proved the existence of an efficient randomized algorithm and Lairez recently showed it can be de-randomized to produce a deterministic efficient algorithm. Here we consider the real setting, to which previously developed methods do not apply. We describe a polynomial time algorithm that finds solutions (with high probability) for $n= d -O(\sqrt{d\log d})$ if the maximal degree is bounded by $d^2$ and for $n=d-1$ if the maximal degree is larger than $d^2$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01735v2-abstract-full').style.display = 'none'; document.getElementById('2405.01735v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">49 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04376">arXiv:2402.04376</a> <span> [<a href="https://arxiv.org/pdf/2402.04376">pdf</a>, <a href="https://arxiv.org/format/2402.04376">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Scaling laws for learning with real and surrogate data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+A">Ayush Jain</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Sasoglu%2C+E">Eren Sasoglu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04376v3-abstract-short" style="display: inline;"> Collecting large quantities of high-quality data can be prohibitively expensive or impractical, and a bottleneck in machine learning. One may instead augment a small set of $n$ data points from the target distribution with data from more accessible sources, e.g. data collected under different circumstances or synthesized by generative models. We refer to such data as `surrogate data'. We study a w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04376v3-abstract-full').style.display = 'inline'; document.getElementById('2402.04376v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04376v3-abstract-full" style="display: none;"> Collecting large quantities of high-quality data can be prohibitively expensive or impractical, and a bottleneck in machine learning. One may instead augment a small set of $n$ data points from the target distribution with data from more accessible sources, e.g. data collected under different circumstances or synthesized by generative models. We refer to such data as `surrogate data'. We study a weighted empirical risk minimization (ERM) approach for integrating surrogate data into training. We analyze mathematically this method under several classical statistical models, and validate our findings empirically on datasets from different domains. Our main findings are: $(i)$ Integrating surrogate data can significantly reduce the test error on the original distribution. Surprisingly, this can happen even when the surrogate data is unrelated to the original ones. We trace back this behavior to the classical Stein's paradox. $(ii)$ In order to reap the benefit of surrogate data, it is crucial to use optimally weighted ERM. $(iii)$ The test error of models trained on mixtures of real and surrogate data is approximately described by a scaling law. This scaling law can be used to predict the optimal weighting scheme, and to choose the amount of surrogate data to add. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04376v3-abstract-full').style.display = 'none'; document.getElementById('2402.04376v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Added new experiment and minor changes</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09860">arXiv:2401.09860</a> <span> [<a href="https://arxiv.org/pdf/2401.09860">pdf</a>, <a href="https://arxiv.org/format/2401.09860">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Succinctness of Cosafety Fragments of LTL via Combinatorial Proof Systems (extended version) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Mansutti%2C+A">Alessio Mansutti</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09860v2-abstract-short" style="display: inline;"> This paper focuses on succinctness results for fragments of Linear Temporal Logic with Past (LTL) devoid of binary temporal operators like until, and provides methods to establish them. We prove that there is a family of cosafety languages (Ln)_{n>=1} such that Ln can be expressed with a pure future formula of size O(n), but it requires formulae of size 2^惟(n) to be captured with past formulae. As… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09860v2-abstract-full').style.display = 'inline'; document.getElementById('2401.09860v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09860v2-abstract-full" style="display: none;"> This paper focuses on succinctness results for fragments of Linear Temporal Logic with Past (LTL) devoid of binary temporal operators like until, and provides methods to establish them. We prove that there is a family of cosafety languages (Ln)_{n>=1} such that Ln can be expressed with a pure future formula of size O(n), but it requires formulae of size 2^惟(n) to be captured with past formulae. As a by-product, such a succinctness result shows the optimality of the pastification algorithm proposed in [Artale et al., KR, 2023]. We show that, in the considered case, succinctness cannot be proven by relying on the classical automata-based method introduced in [Markey, Bull. EATCS, 2003]. In place of this method, we devise and apply a combinatorial proof system whose deduction trees represent LTL formulae. The system can be seen as a proof-centric (one-player) view on the games used by Adler and Immerman to study the succinctness of CTL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09860v2-abstract-full').style.display = 'none'; document.getElementById('2401.09860v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.3.1; F.4.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.14563">arXiv:2309.14563</a> <span> [<a href="https://arxiv.org/pdf/2309.14563">pdf</a>, <a href="https://arxiv.org/format/2309.14563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards a statistical theory of data selection under weak supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kolossov%2C+G">Germain Kolossov</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Tandon%2C+P">Pulkit Tandon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.14563v2-abstract-short" style="display: inline;"> Given a sample of size $N$, it is often useful to select a subsample of smaller size $n<N$ to be used for statistical estimation or learning. Such a data selection step is useful to reduce the requirements of data labeling and the computational complexity of learning. We assume to be given $N$ unlabeled samples $\{{\boldsymbol x}_i\}_{i\le N}$, and to be given access to a `surrogate model' that ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14563v2-abstract-full').style.display = 'inline'; document.getElementById('2309.14563v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.14563v2-abstract-full" style="display: none;"> Given a sample of size $N$, it is often useful to select a subsample of smaller size $n<N$ to be used for statistical estimation or learning. Such a data selection step is useful to reduce the requirements of data labeling and the computational complexity of learning. We assume to be given $N$ unlabeled samples $\{{\boldsymbol x}_i\}_{i\le N}$, and to be given access to a `surrogate model' that can predict labels $y_i$ better than random guessing. Our goal is to select a subset of the samples, to be denoted by $\{{\boldsymbol x}_i\}_{i\in G}$, of size $|G|=n<N$. We then acquire labels for this set and we use them to train a model via regularized empirical risk minimization. By using a mixture of numerical experiments on real and synthetic data, and mathematical derivations under low- and high- dimensional asymptotics, we show that: $(i)$~Data selection can be very effective, in particular beating training on the full sample in some cases; $(ii)$~Certain popular choices in data selection methods (e.g. unbiased reweighted subsampling, or influence function-based subsampling) can be substantially suboptimal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14563v2-abstract-full').style.display = 'none'; document.getElementById('2309.14563v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">55 pages; 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13431">arXiv:2308.13431</a> <span> [<a href="https://arxiv.org/pdf/2308.13431">pdf</a>, <a href="https://arxiv.org/format/2308.13431">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Six Lectures on Linearized Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Misiakiewicz%2C+T">Theodor Misiakiewicz</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13431v1-abstract-short" style="display: inline;"> In these six lectures, we examine what can be learnt about the behavior of multi-layer neural networks from the analysis of linear models. We first recall the correspondence between neural networks and linear models via the so-called lazy regime. We then review four models for linearized neural networks: linear regression with concentrated features, kernel ridge regression, random feature model an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13431v1-abstract-full').style.display = 'inline'; document.getElementById('2308.13431v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13431v1-abstract-full" style="display: none;"> In these six lectures, we examine what can be learnt about the behavior of multi-layer neural networks from the analysis of linear models. We first recall the correspondence between neural networks and linear models via the so-called lazy regime. We then review four models for linearized neural networks: linear regression with concentrated features, kernel ridge regression, random feature model and neural tangent model. Finally, we highlight the limitations of the linear theory and discuss how other approaches can overcome them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13431v1-abstract-full').style.display = 'none'; document.getElementById('2308.13431v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">77 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.12289">arXiv:2307.12289</a> <span> [<a href="https://arxiv.org/pdf/2307.12289">pdf</a>, <a href="https://arxiv.org/format/2307.12289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.46298/lmcs-20(3:17)2024">10.46298/lmcs-20(3:17)2024 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Controller Synthesis for Timeline-based Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Acampora%2C+R">Renato Acampora</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Picotti%2C+V">Valentino Picotti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.12289v4-abstract-short" style="display: inline;"> In the timeline-based approach to planning, the evolution over time of a set of state variables (the timelines) is governed by a set of temporal constraints. Traditional timeline-based planning systems excel at the integration of planning with execution by handling temporal uncertainty. In order to handle general nondeterminism as well, the concept of timeline-based games has been recently introdu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.12289v4-abstract-full').style.display = 'inline'; document.getElementById('2307.12289v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.12289v4-abstract-full" style="display: none;"> In the timeline-based approach to planning, the evolution over time of a set of state variables (the timelines) is governed by a set of temporal constraints. Traditional timeline-based planning systems excel at the integration of planning with execution by handling temporal uncertainty. In order to handle general nondeterminism as well, the concept of timeline-based games has been recently introduced. It has been proved that finding whether a winning strategy exists for such games is 2EXPTIME-complete. However, a concrete approach to synthesize controllers implementing such strategies is missing. This paper fills this gap, by providing an effective and computationally optimal approach to controller synthesis for timeline-based games. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.12289v4-abstract-full').style.display = 'none'; document.getElementById('2307.12289v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2209.10319</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Logical Methods in Computer Science, Volume 20, Issue 3 (August 27, 2024) lmcs:11639 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10690">arXiv:2305.10690</a> <span> [<a href="https://arxiv.org/pdf/2305.10690">pdf</a>, <a href="https://arxiv.org/format/2305.10690">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sampling, Diffusions, and Stochastic Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10690v1-abstract-short" style="display: inline;"> Diffusions are a successful technique to sample from high-dimensional distributions can be either explicitly given or learnt from a collection of samples. They implement a diffusion process whose endpoint is a sample from the target distribution and whose drift is typically represented as a neural network. Stochastic localization is a successful technique to prove mixing of Markov Chains and other… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10690v1-abstract-full').style.display = 'inline'; document.getElementById('2305.10690v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10690v1-abstract-full" style="display: none;"> Diffusions are a successful technique to sample from high-dimensional distributions can be either explicitly given or learnt from a collection of samples. They implement a diffusion process whose endpoint is a sample from the target distribution and whose drift is typically represented as a neural network. Stochastic localization is a successful technique to prove mixing of Markov Chains and other functional inequalities in high dimension. An algorithmic version of stochastic localization was introduced in [EAMS2022], to obtain an algorithm that samples from certain statistical mechanics models. This notes have three objectives: (i) Generalize the construction [EAMS2022] to other stochastic localization processes; (ii) Clarify the connection between diffusions and stochastic localization. In particular we show that standard denoising diffusions are stochastic localizations but other examples that are naturally suggested by the proposed viewpoint; (iii) Describe some insights that follow from this viewpoint. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10690v1-abstract-full').style.display = 'none'; document.getElementById('2305.10690v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 5 pdf figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.11483">arXiv:2304.11483</a> <span> [<a href="https://arxiv.org/pdf/2304.11483">pdf</a>, <a href="https://arxiv.org/ps/2304.11483">ps</a>, <a href="https://arxiv.org/format/2304.11483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> The Logic of Prefixes and Suffixes is Elementary under Homogeneity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Della+Monica%2C+D">Dario Della Monica</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Puppis%2C+G">Gabriele Puppis</a>, <a href="/search/cs?searchtype=author&query=Sala%2C+P">Pietro Sala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.11483v1-abstract-short" style="display: inline;"> In this paper, we study the finite satisfiability problem for the logic BE under the homogeneity assumption. BE is the cornerstone of Halpern and Shoham's interval temporal logic, and features modal operators corresponding to the prefix (a.k.a. "Begins") and suffix (a.k.a. "Ends") relations on intervals. In terms of complexity, BE lies in between the "Chop logic C", whose satisfiability problem is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11483v1-abstract-full').style.display = 'inline'; document.getElementById('2304.11483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.11483v1-abstract-full" style="display: none;"> In this paper, we study the finite satisfiability problem for the logic BE under the homogeneity assumption. BE is the cornerstone of Halpern and Shoham's interval temporal logic, and features modal operators corresponding to the prefix (a.k.a. "Begins") and suffix (a.k.a. "Ends") relations on intervals. In terms of complexity, BE lies in between the "Chop logic C", whose satisfiability problem is known to be non-elementary, and the PSPACE-complete interval logic D of the sub-interval (a.k.a. "During") relation. BE was shown to be EXPSPACE-hard, and the only known satisfiability procedure is primitive recursive, but not elementary. Our contribution consists of tightening the complexity bounds of the satisfiability problem for BE, by proving it to be EXPSPACE-complete. We do so by devising an equi-satisfiable normal form with boundedly many nested modalities. The normalization technique resembles Scott's quantifier elimination, but it turns out to be much more involved due to the limitations enforced by the homogeneity assumption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11483v1-abstract-full').style.display = 'none'; document.getElementById('2304.11483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.00055">arXiv:2303.00055</a> <span> [<a href="https://arxiv.org/pdf/2303.00055">pdf</a>, <a href="https://arxiv.org/format/2303.00055">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10208-024-09664-9">10.1007/s10208-024-09664-9 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Learning time-scales in two-layers neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Berthier%2C+R">Rapha毛l Berthier</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kangjie Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.00055v4-abstract-short" style="display: inline;"> Gradient-based learning in multi-layer neural networks displays a number of striking features. In particular, the decrease rate of empirical risk is non-monotone even after averaging over large batches. Long plateaus in which one observes barely any progress alternate with intervals of rapid decrease. These successive phases of learning often take place on very different time scales. Finally, mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00055v4-abstract-full').style.display = 'inline'; document.getElementById('2303.00055v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.00055v4-abstract-full" style="display: none;"> Gradient-based learning in multi-layer neural networks displays a number of striking features. In particular, the decrease rate of empirical risk is non-monotone even after averaging over large batches. Long plateaus in which one observes barely any progress alternate with intervals of rapid decrease. These successive phases of learning often take place on very different time scales. Finally, models learnt in an early phase are typically `simpler' or `easier to learn' although in a way that is difficult to formalize. Although theoretical explanations of these phenomena have been put forward, each of them captures at best certain specific regimes. In this paper, we study the gradient flow dynamics of a wide two-layer neural network in high-dimension, when data are distributed according to a single-index model (i.e., the target function depends on a one-dimensional projection of the covariates). Based on a mixture of new rigorous results, non-rigorous mathematical derivations, and numerical simulations, we propose a scenario for the learning dynamics in this setting. In particular, the proposed evolution exhibits separation of timescales and intermittency. These behaviors arise naturally because the population gradient flow can be recast as a singularly perturbed dynamical system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00055v4-abstract-full').style.display = 'none'; document.getElementById('2303.00055v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">64 pages, 15 figures, Found Comput Math (2024)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 34E15; 37N40; 68T07 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09780">arXiv:2302.09780</a> <span> [<a href="https://arxiv.org/pdf/2302.09780">pdf</a>, <a href="https://arxiv.org/format/2302.09780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Compressing Tabular Data via Latent Variable Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Weiner%2C+E">Eric Weiner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09780v1-abstract-short" style="display: inline;"> Data used for analytics and machine learning often take the form of tables with categorical entries. We introduce a family of lossless compression algorithms for such data that proceed in four steps: $(i)$ Estimate latent variables associated to rows and columns; $(ii)$ Partition the table in blocks according to the row/column latents; $(iii)$ Apply a sequential (e.g. Lempel-Ziv) coder to each of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09780v1-abstract-full').style.display = 'inline'; document.getElementById('2302.09780v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09780v1-abstract-full" style="display: none;"> Data used for analytics and machine learning often take the form of tables with categorical entries. We introduce a family of lossless compression algorithms for such data that proceed in four steps: $(i)$ Estimate latent variables associated to rows and columns; $(ii)$ Partition the table in blocks according to the row/column latents; $(iii)$ Apply a sequential (e.g. Lempel-Ziv) coder to each of the blocks; $(iv)$ Append a compressed encoding of the latents. We evaluate it on several benchmark datasets, and study optimal compression in a probabilistic model for that tabular data, whereby latent values are independent and table entries are conditionally independent given the latent values. We prove that the model has a well defined entropy rate and satisfies an asymptotic equipartition property. We also prove that classical compression schemes such as Lempel-Ziv and finite-state encoders do not achieve this rate. On the other hand, the latent estimation strategy outlined above achieves the optimal rate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09780v1-abstract-full').style.display = 'none'; document.getElementById('2302.09780v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">45 pages; 6 pdf figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.05179">arXiv:2302.05179</a> <span> [<a href="https://arxiv.org/pdf/2302.05179">pdf</a>, <a href="https://arxiv.org/format/2302.05179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.artmed.2021.102133">10.1016/j.artmed.2021.102133 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AIOSA: An approach to the automatic identification of obstructive sleep apnea events based on deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bernardini%2C+A">Andrea Bernardini</a>, <a href="/search/cs?searchtype=author&query=Brunello%2C+A">Andrea Brunello</a>, <a href="/search/cs?searchtype=author&query=Gigli%2C+G+L">Gian Luigi Gigli</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Saccomanno%2C+N">Nicola Saccomanno</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.05179v1-abstract-short" style="display: inline;"> Obstructive Sleep Apnea Syndrome (OSAS) is the most common sleep-related breathing disorder. It is caused by an increased upper airway resistance during sleep, which determines episodes of partial or complete interruption of airflow. The detection and treatment of OSAS is particularly important in stroke patients, because the presence of severe OSAS is associated with higher mortality, worse neuro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05179v1-abstract-full').style.display = 'inline'; document.getElementById('2302.05179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.05179v1-abstract-full" style="display: none;"> Obstructive Sleep Apnea Syndrome (OSAS) is the most common sleep-related breathing disorder. It is caused by an increased upper airway resistance during sleep, which determines episodes of partial or complete interruption of airflow. The detection and treatment of OSAS is particularly important in stroke patients, because the presence of severe OSAS is associated with higher mortality, worse neurological deficits, worse functional outcome after rehabilitation, and a higher likelihood of uncontrolled hypertension. The gold standard test for diagnosing OSAS is polysomnography (PSG). Unfortunately, performing a PSG in an electrically hostile environment, like a stroke unit, on neurologically impaired patients is a difficult task; also, the number of strokes per day outnumbers the availability of polysomnographs and dedicated healthcare professionals. Thus, a simple and automated recognition system to identify OSAS among acute stroke patients, relying on routinely recorded vital signs, is desirable. The majority of the work done so far focuses on data recorded in ideal conditions and highly selected patients, and thus it is hardly exploitable in real-life settings, where it would be of actual use. In this paper, we propose a convolutional deep learning architecture able to reduce the temporal resolution of raw waveform data, like physiological signals, extracting key features that can be used for further processing. We exploit models based on such an architecture to detect OSAS events in stroke unit recordings obtained from the monitoring of unselected patients. Unlike existing approaches, annotations are performed at one-second granularity, allowing physicians to better interpret the model outcome. Results are considered to be satisfactory by the domain experts. Moreover, based on a widely-used benchmark, we show that the proposed approach outperforms current state-of-the-art solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05179v1-abstract-full').style.display = 'none'; document.getElementById('2302.05179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Final article published on Artificial Intelligence in Medicine Journal</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Artificial Intelligence in Medicine, Volume 118, 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.14913">arXiv:2211.14913</a> <span> [<a href="https://arxiv.org/pdf/2211.14913">pdf</a>, <a href="https://arxiv.org/ps/2211.14913">ps</a>, <a href="https://arxiv.org/format/2211.14913">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Complexity of Safety and coSafety Fragments of Linear Temporal Logic </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Artale%2C+A">Alessandro Artale</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Mazzullo%2C+A">Andrea Mazzullo</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.14913v2-abstract-short" style="display: inline;"> Linear Temporal Logic (LTL) is the de-facto standard temporal logic for system specification, whose foundational properties have been studied for over five decades. Safety and cosafety properties define notable fragments of LTL, where a prefix of a trace suffices to establish whether a formula is true or not over that trace. In this paper, we study the complexity of the problems of satisfiability,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14913v2-abstract-full').style.display = 'inline'; document.getElementById('2211.14913v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.14913v2-abstract-full" style="display: none;"> Linear Temporal Logic (LTL) is the de-facto standard temporal logic for system specification, whose foundational properties have been studied for over five decades. Safety and cosafety properties define notable fragments of LTL, where a prefix of a trace suffices to establish whether a formula is true or not over that trace. In this paper, we study the complexity of the problems of satisfiability, validity, and realizability over infinite and finite traces for the safety and cosafety fragments of LTL. As for satisfiability and validity over infinite traces, we prove that the majority of the fragments have the same complexity as full LTL, that is, they are PSPACE-complete. The picture is radically different for realizability: we find fragments with the same expressive power whose complexity varies from 2EXPTIME-complete (as full LTL) to EXPTIME-complete. Notably, for all cosafety fragments, the complexity of the three problems does not change passing from infinite to finite traces, while for all safety fragments the complexity of satisfiability (resp., realizability) over finite traces drops to NP-complete (resp., $螤^P_2$-complete). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14913v2-abstract-full').style.display = 'none'; document.getElementById('2211.14913v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.10319">arXiv:2209.10319</a> <span> [<a href="https://arxiv.org/pdf/2209.10319">pdf</a>, <a href="https://arxiv.org/ps/2209.10319">ps</a>, <a href="https://arxiv.org/format/2209.10319">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.4204/EPTCS.370.9">10.4204/EPTCS.370.9 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Controller Synthesis for Timeline-based Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Acampora%2C+R">Renato Acampora</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Picotti%2C+V">Valentino Picotti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.10319v1-abstract-short" style="display: inline;"> In the timeline-based approach to planning, originally born in the space sector, the evolution over time of a set of state variables (the timelines) is governed by a set of temporal constraints. Traditional timeline-based planning systems excel at the integration of planning with execution by handling temporal uncertainty. In order to handle general nondeterminism as well, the concept of timeline-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.10319v1-abstract-full').style.display = 'inline'; document.getElementById('2209.10319v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.10319v1-abstract-full" style="display: none;"> In the timeline-based approach to planning, originally born in the space sector, the evolution over time of a set of state variables (the timelines) is governed by a set of temporal constraints. Traditional timeline-based planning systems excel at the integration of planning with execution by handling temporal uncertainty. In order to handle general nondeterminism as well, the concept of timeline-based games has been recently introduced. It has been proved that finding whether a winning strategy exists for such games is 2EXPTIME-complete. However, a concrete approach to synthesize controllers implementing such strategies is missing. This paper fills this gap, outlining an approach to controller synthesis for timeline-based games. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.10319v1-abstract-full').style.display = 'none'; document.getElementById('2209.10319v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings GandALF 2022, arXiv:2209.09333</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EPTCS 370, 2022, pp. 131-146 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.02307">arXiv:2209.02307</a> <span> [<a href="https://arxiv.org/pdf/2209.02307">pdf</a>, <a href="https://arxiv.org/format/2209.02307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.46298/lmcs-19(3:13)2023">10.46298/lmcs-19(3:13)2023 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A first-order logic characterization of safety and co-safety languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cimatti%2C+A">Alessandro Cimatti</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Tonetta%2C+S">Stefano Tonetta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.02307v5-abstract-short" style="display: inline;"> Linear Temporal Logic (LTL) is one of the most popular temporal logics, that comes into play in a variety of branches of computer science. Among the various reasons of its widespread use there are its strong foundational properties: LTL is equivalent to counter-free omega-automata, to star-free omega-regular expressions, and (by Kamp's theorem) to the First-Order Theory of Linear Orders (FO-TLO).… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.02307v5-abstract-full').style.display = 'inline'; document.getElementById('2209.02307v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.02307v5-abstract-full" style="display: none;"> Linear Temporal Logic (LTL) is one of the most popular temporal logics, that comes into play in a variety of branches of computer science. Among the various reasons of its widespread use there are its strong foundational properties: LTL is equivalent to counter-free omega-automata, to star-free omega-regular expressions, and (by Kamp's theorem) to the First-Order Theory of Linear Orders (FO-TLO). Safety and co-safety languages, where a finite prefix suffices to establish whether a word does not belong or belongs to the language, respectively, play a crucial role in lowering the complexity of problems like model checking and reactive synthesis for LTL. SafetyLTL (resp., coSafetyLTL) is a fragment of LTL where only universal (resp., existential) temporal modalities are allowed, that recognises safety (resp., co-safety) languages only. The main contribution of this paper is the introduction of a fragment of FO-TLO, called SafetyFO, and of its dual coSafetyFO, which are expressively complete with respect to the LTL-definable safety and co-safety languages. We prove that they exactly characterize SafetyLTL and coSafetyLTL, respectively, a result that joins Kamp's theorem, and provides a clearer view of the characterization of (fragments of) LTL in terms of first-order languages. In addition, it gives a direct, compact, and self-contained proof that any safety language definable in LTL is definable in SafetyLTL as well. As a by-product, we obtain some interesting results on the expressive power of the weak tomorrow operator of SafetyLTL, interpreted over finite and infinite words. Moreover, we prove that, when interpreted over finite words, SafetyLTL (resp. coSafetyLTL) devoid of the tomorrow (resp., weak tomorrow) operator captures the safety (resp., co-safety) fragment of LTL over finite words. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.02307v5-abstract-full').style.display = 'none'; document.getElementById('2209.02307v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Logical Methods in Computer Science, Volume 19, Issue 3 (August 10, 2023) lmcs:10061 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.06526">arXiv:2206.06526</a> <span> [<a href="https://arxiv.org/pdf/2206.06526">pdf</a>, <a href="https://arxiv.org/format/2206.06526">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Overparametrized linear dimensionality reductions: From projection pursuit to two-layer neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kangjie Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.06526v1-abstract-short" style="display: inline;"> Given a cloud of $n$ data points in $\mathbb{R}^d$, consider all projections onto $m$-dimensional subspaces of $\mathbb{R}^d$ and, for each such projection, the empirical distribution of the projected points. What does this collection of probability distributions look like when $n,d$ grow large? We consider this question under the null model in which the points are i.i.d. standard Gaussian vecto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.06526v1-abstract-full').style.display = 'inline'; document.getElementById('2206.06526v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.06526v1-abstract-full" style="display: none;"> Given a cloud of $n$ data points in $\mathbb{R}^d$, consider all projections onto $m$-dimensional subspaces of $\mathbb{R}^d$ and, for each such projection, the empirical distribution of the projected points. What does this collection of probability distributions look like when $n,d$ grow large? We consider this question under the null model in which the points are i.i.d. standard Gaussian vectors, focusing on the asymptotic regime in which $n,d\to\infty$, with $n/d\to伪\in (0,\infty)$, while $m$ is fixed. Denoting by $\mathscr{F}_{m, 伪}$ the set of probability distributions in $\mathbb{R}^m$ that arise as low-dimensional projections in this limit, we establish new inner and outer bounds on $\mathscr{F}_{m, 伪}$. In particular, we characterize the Wasserstein radius of $\mathscr{F}_{m,伪}$ up to logarithmic factors, and determine it exactly for $m=1$. We also prove sharp bounds in terms of Kullback-Leibler divergence and R茅nyi information dimension. The previous question has application to unsupervised learning methods, such as projection pursuit and independent component analysis. We introduce a version of the same problem that is relevant for supervised learning, and prove a sharp Wasserstein radius bound. As an application, we establish an upper bound on the interpolation threshold of two-layers neural networks with $m$ hidden neurons. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.06526v1-abstract-full').style.display = 'none'; document.getElementById('2206.06526v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">53 pages, 1 figure, an earlier version of this paper was accepted for presentation at the Conference on Learning Theory (COLT) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.17209">arXiv:2203.17209</a> <span> [<a href="https://arxiv.org/pdf/2203.17209">pdf</a>, <a href="https://arxiv.org/ps/2203.17209">ps</a>, <a href="https://arxiv.org/format/2203.17209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Examples in Random Neural Networks with General Activations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuchen Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.17209v2-abstract-short" style="display: inline;"> A substantial body of empirical work documents the lack of robustness in deep learning models to adversarial examples. Recent theoretical work proved that adversarial examples are ubiquitous in two-layers networks with sub-exponential width and ReLU or smooth activations, and multi-layer ReLU networks with sub-exponential width. We present a result of the same type, with no restriction on width an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.17209v2-abstract-full').style.display = 'inline'; document.getElementById('2203.17209v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.17209v2-abstract-full" style="display: none;"> A substantial body of empirical work documents the lack of robustness in deep learning models to adversarial examples. Recent theoretical work proved that adversarial examples are ubiquitous in two-layers networks with sub-exponential width and ReLU or smooth activations, and multi-layer ReLU networks with sub-exponential width. We present a result of the same type, with no restriction on width and for general locally Lipschitz continuous activations. More precisely, given a neural network $f(\,\cdot\,;{\boldsymbol 胃})$ with random weights ${\boldsymbol 胃}$, and feature vector ${\boldsymbol x}$, we show that an adversarial example ${\boldsymbol x}'$ can be found with high probability along the direction of the gradient $\nabla_{\boldsymbol x}f({\boldsymbol x};{\boldsymbol 胃})$. Our proof is based on a Gaussian conditioning technique. Instead of proving that $f$ is approximately linear in a neighborhood of ${\boldsymbol x}$, we characterize the joint distribution of $f({\boldsymbol x};{\boldsymbol 胃})$ and $f({\boldsymbol x}';{\boldsymbol 胃})$ for ${\boldsymbol x}' = {\boldsymbol x}-s({\boldsymbol x})\nabla_{\boldsymbol x}f({\boldsymbol x};{\boldsymbol 胃})$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.17209v2-abstract-full').style.display = 'none'; document.getElementById('2203.17209v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.06396">arXiv:2203.06396</a> <span> [<a href="https://arxiv.org/pdf/2203.06396">pdf</a>, <a href="https://arxiv.org/format/2203.06396">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A combined approach to the analysis of speech conversations in a contact center domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Brunello%2C+A">Andrea Brunello</a>, <a href="/search/cs?searchtype=author&query=Marzano%2C+E">Enrico Marzano</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Sciavicco%2C+G">Guido Sciavicco</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.06396v1-abstract-short" style="display: inline;"> The ever more accurate search for deep analysis in customer data is a really strong technological trend nowadays, quite appealing to both private and public companies. This is particularly true in the contact center domain, where speech analytics is an extremely powerful methodology for gaining insights from unstructured data, coming from customer and human agent conversations. In this work, we de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.06396v1-abstract-full').style.display = 'inline'; document.getElementById('2203.06396v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.06396v1-abstract-full" style="display: none;"> The ever more accurate search for deep analysis in customer data is a really strong technological trend nowadays, quite appealing to both private and public companies. This is particularly true in the contact center domain, where speech analytics is an extremely powerful methodology for gaining insights from unstructured data, coming from customer and human agent conversations. In this work, we describe an experimentation with a speech analytics process for an Italian contact center, that deals with call recordings extracted from inbound or outbound flows. First, we illustrate in detail the development of an in-house speech-to-text solution, based on Kaldi framework, and evaluate its performance (and compare it to Google Cloud Speech API). Then, we evaluate and compare different approaches to the semantic tagging of call transcripts, ranging from classic regular expressions to machine learning models based on ngrams and logistic regression, and propose a combination of them, which is shown to provide a consistent benefit. Finally, a decision tree inducer, called J48S, is applied to the problem of tagging. Such an algorithm is natively capable of exploiting sequential data, such as texts, for classification purposes. The solution is compared with the other approaches and is shown to provide competitive classification performances, while generating highly interpretable models and reducing the complexity of the data preparation phase. The potential operational impact of the whole process is thoroughly examined. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.06396v1-abstract-full').style.display = 'none'; document.getElementById('2203.06396v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.05093">arXiv:2203.05093</a> <span> [<a href="https://arxiv.org/pdf/2203.05093">pdf</a>, <a href="https://arxiv.org/ps/2203.05093">ps</a>, <a href="https://arxiv.org/format/2203.05093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Sampling from the Sherrington-Kirkpatrick Gibbs measure via algorithmic stochastic localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Alaoui%2C+A+E">Ahmed El Alaoui</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Sellke%2C+M">Mark Sellke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.05093v2-abstract-short" style="display: inline;"> We consider the Sherrington-Kirkpatrick model of spin glasses at high-temperature and no external field, and study the problem of sampling from the Gibbs distribution $渭$ in polynomial time. We prove that, for any inverse temperature $尾<1/2$, there exists an algorithm with complexity $O(n^2)$ that samples from a distribution $渭^{alg}$ which is close in normalized Wasserstein distance to $渭$. Namel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05093v2-abstract-full').style.display = 'inline'; document.getElementById('2203.05093v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.05093v2-abstract-full" style="display: none;"> We consider the Sherrington-Kirkpatrick model of spin glasses at high-temperature and no external field, and study the problem of sampling from the Gibbs distribution $渭$ in polynomial time. We prove that, for any inverse temperature $尾<1/2$, there exists an algorithm with complexity $O(n^2)$ that samples from a distribution $渭^{alg}$ which is close in normalized Wasserstein distance to $渭$. Namely, there exists a coupling of $渭$ and $渭^{alg}$ such that if $(x,x^{alg})\in\{-1,+1\}^n\times \{-1,+1\}^n$ is a pair drawn from this coupling, then $n^{-1}\mathbb E\{||x-x^{alg}||_2^2\}=o_n(1)$. The best previous results, by Bauerschmidt and Bodineau and by Eldan, Koehler, and Zeitouni, implied efficient algorithms to approximately sample (under a stronger metric) for $尾<1/4$. We complement this result with a negative one, by introducing a suitable "stability" property for sampling algorithms, which is verified by many standard techniques. We prove that no stable algorithm can approximately sample for $尾>1$, even under the normalized Wasserstein metric. Our sampling method is based on an algorithmic implementation of stochastic localization, which progressively tilts the measure $渭$ towards a single configuration, together with an approximate message passing algorithm that is used to approximate the mean of the tilted measure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05093v2-abstract-full').style.display = 'none'; document.getElementById('2203.05093v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.08832">arXiv:2202.08832</a> <span> [<a href="https://arxiv.org/pdf/2202.08832">pdf</a>, <a href="https://arxiv.org/format/2202.08832">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Universality of empirical risk minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+B">Basil Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.08832v2-abstract-short" style="display: inline;"> Consider supervised learning from i.i.d. samples $\{{\boldsymbol x}_i,y_i\}_{i\le n}$ where ${\boldsymbol x}_i \in\mathbb{R}^p$ are feature vectors and ${y} \in \mathbb{R}$ are labels. We study empirical risk minimization over a class of functions that are parameterized by $\mathsf{k} = O(1)$ vectors ${\boldsymbol 胃}_1, . . . , {\boldsymbol 胃}_{\mathsf k} \in \mathbb{R}^p$ , and prove universality… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.08832v2-abstract-full').style.display = 'inline'; document.getElementById('2202.08832v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.08832v2-abstract-full" style="display: none;"> Consider supervised learning from i.i.d. samples $\{{\boldsymbol x}_i,y_i\}_{i\le n}$ where ${\boldsymbol x}_i \in\mathbb{R}^p$ are feature vectors and ${y} \in \mathbb{R}$ are labels. We study empirical risk minimization over a class of functions that are parameterized by $\mathsf{k} = O(1)$ vectors ${\boldsymbol 胃}_1, . . . , {\boldsymbol 胃}_{\mathsf k} \in \mathbb{R}^p$ , and prove universality results both for the training and test error. Namely, under the proportional asymptotics $n,p\to\infty$, with $n/p = 螛(1)$, we prove that the training error depends on the random features distribution only through its covariance structure. Further, we prove that the minimum test error over near-empirical risk minimizers enjoys similar universality properties. In particular, the asymptotics of these quantities can be computed $-$to leading order$-$ under a simpler model in which the feature vectors ${\boldsymbol x}_i$ are replaced by Gaussian vectors ${\boldsymbol g}_i$ with the same covariance. Earlier universality results were limited to strongly convex learning procedures, or to feature vectors ${\boldsymbol x}_i$ with independent entries. Our results do not make any of these assumptions. Our assumptions are general enough to include feature vectors ${\boldsymbol x}_i$ that are produced by randomized featurization maps. In particular we explicitly check the assumptions for certain random features models (computing the output of a one-layer neural network with random weights) and neural tangent models (first-order Taylor approximation of two-layer networks). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.08832v2-abstract-full').style.display = 'none'; document.getElementById('2202.08832v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">74 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.07881">arXiv:2202.07881</a> <span> [<a href="https://arxiv.org/pdf/2202.07881">pdf</a>, <a href="https://arxiv.org/format/2202.07881">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.46298/lmcs-20(1:23)2024">10.46298/lmcs-20(1:23)2024 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The addition of temporal neighborhood makes the logic of prefixes and sub-intervals EXPSPACE-complete </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bozzelli%2C+L">L. Bozzelli</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">A. Montanari</a>, <a href="/search/cs?searchtype=author&query=Peron%2C+A">A. Peron</a>, <a href="/search/cs?searchtype=author&query=Sala%2C+P">P. Sala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.07881v5-abstract-short" style="display: inline;"> A classic result by Stockmeyer gives a non-elementary lower bound to the emptiness problem for star-free generalized regular expressions. This result is intimately connected to the satisfiability problem for interval temporal logic, notably for formulas that make use of the so-called chop operator. Such an operator can indeed be interpreted as the inverse of the concatenation operation on regular… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07881v5-abstract-full').style.display = 'inline'; document.getElementById('2202.07881v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.07881v5-abstract-full" style="display: none;"> A classic result by Stockmeyer gives a non-elementary lower bound to the emptiness problem for star-free generalized regular expressions. This result is intimately connected to the satisfiability problem for interval temporal logic, notably for formulas that make use of the so-called chop operator. Such an operator can indeed be interpreted as the inverse of the concatenation operation on regular languages, and this correspondence enables reductions between non-emptiness of star-free generalized regular expressions and satisfiability of formulas of the interval temporal logic of chop under the homogeneity assumption. In this paper, we study the complexity of the satisfiability problem for suitable weakenings of the chop interval temporal logic, that can be equivalently viewed as fragments of Halpern and Shoham interval logic. We first consider the logic $\mathsf{BD}_{hom}$ featuring modalities $B$, for \emph{begins}, corresponding to the prefix relation on pairs of intervals, and $D$, for \emph{during}, corresponding to the infix relation. The homogeneous models of $\mathsf{BD}_{hom}$ naturally correspond to languages defined by restricted forms of regular expressions, that use union, complementation, and the inverses of the prefix and infix relations. Such a fragment has been recently shown to be PSPACE-complete . In this paper, we study the extension $\mathsf{BD}_{hom}$ with the temporal neighborhood modality $A$ (corresponding to the Allen relation \emph{Meets}), and prove that it increases both its expressiveness and complexity. In particular, we show that the resulting logic $\mathsf{BDA}_{hom}$ is EXPSPACE-complete. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07881v5-abstract-full').style.display = 'none'; document.getElementById('2202.07881v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Logical Methods in Computer Science, Volume 20, Issue 1 (March 22, 2024) lmcs:9092 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.06813">arXiv:2111.06813</a> <span> [<a href="https://arxiv.org/pdf/2111.06813">pdf</a>, <a href="https://arxiv.org/ps/2111.06813">ps</a>, <a href="https://arxiv.org/format/2111.06813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Local algorithms for Maximum Cut and Minimum Bisection on locally treelike regular graphs of large degree </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Alaoui%2C+A+E">Ahmed El Alaoui</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Sellke%2C+M">Mark Sellke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.06813v2-abstract-short" style="display: inline;"> Given a graph $G$ of degree $k$ over $n$ vertices, we consider the problem of computing a near maximum cut or a near minimum bisection in polynomial time. For graphs of girth $2L$, we develop a local message passing algorithm whose complexity is $O(nkL)$, and that achieves near optimal cut values among all $L$-local algorithms. Focusing on max-cut, the algorithm constructs a cut of value… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.06813v2-abstract-full').style.display = 'inline'; document.getElementById('2111.06813v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.06813v2-abstract-full" style="display: none;"> Given a graph $G$ of degree $k$ over $n$ vertices, we consider the problem of computing a near maximum cut or a near minimum bisection in polynomial time. For graphs of girth $2L$, we develop a local message passing algorithm whose complexity is $O(nkL)$, and that achieves near optimal cut values among all $L$-local algorithms. Focusing on max-cut, the algorithm constructs a cut of value $nk/4+ n\mathsf{P}_\star\sqrt{k/4}+\mathsf{err}(n,k,L)$, where $\mathsf{P}_\star\approx 0.763166$ is the value of the Parisi formula from spin glass theory, and $\mathsf{err}(n,k,L)=o_n(n)+no_k(\sqrt{k})+n \sqrt{k} o_L(1)$ (subscripts indicate the asymptotic variables). Our result generalizes to locally treelike graphs, i.e., graphs whose girth becomes $2L$ after removing a small fraction of vertices. Earlier work established that, for random $k$-regular graphs, the typical max-cut value is $nk/4+ n\mathsf{P}_\star\sqrt{k/4}+o_n(n)+no_k(\sqrt{k})$. Therefore our algorithm is nearly optimal on such graphs. An immediate corollary of this result is that random regular graphs have nearly minimum max-cut, and nearly maximum min-bisection among all regular locally treelike graphs. This can be viewed as a combinatorial version of the near-Ramanujan property of random regular graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.06813v2-abstract-full').style.display = 'none'; document.getElementById('2111.06813v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Improved presentation. To appear in Random Structures and Algorithms</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.15824">arXiv:2110.15824</a> <span> [<a href="https://arxiv.org/pdf/2110.15824">pdf</a>, <a href="https://arxiv.org/format/2110.15824">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s00440-023-01248-y">10.1007/s00440-023-01248-y <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Tractability from overparametrization: The example of the negative perceptron </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Y">Yiqiao Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kangjie Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.15824v3-abstract-short" style="display: inline;"> In the negative perceptron problem we are given $n$ data points $({\boldsymbol x}_i,y_i)$, where ${\boldsymbol x}_i$ is a $d$-dimensional vector and $y_i\in\{+1,-1\}$ is a binary label. The data are not linearly separable and hence we content ourselves to find a linear classifier with the largest possible \emph{negative} margin. In other words, we want to find a unit norm vector ${\boldsymbol 胃}$… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.15824v3-abstract-full').style.display = 'inline'; document.getElementById('2110.15824v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.15824v3-abstract-full" style="display: none;"> In the negative perceptron problem we are given $n$ data points $({\boldsymbol x}_i,y_i)$, where ${\boldsymbol x}_i$ is a $d$-dimensional vector and $y_i\in\{+1,-1\}$ is a binary label. The data are not linearly separable and hence we content ourselves to find a linear classifier with the largest possible \emph{negative} margin. In other words, we want to find a unit norm vector ${\boldsymbol 胃}$ that maximizes $\min_{i\le n}y_i\langle {\boldsymbol 胃},{\boldsymbol x}_i\rangle$. This is a non-convex optimization problem (it is equivalent to finding a maximum norm vector in a polytope), and we study its typical properties under two random models for the data. We consider the proportional asymptotics in which $n,d\to \infty$ with $n/d\to未$, and prove upper and lower bounds on the maximum margin $魏_{\text{s}}(未)$ or -- equivalently -- on its inverse function $未_{\text{s}}(魏)$. In other words, $未_{\text{s}}(魏)$ is the overparametrization threshold: for $n/d\le 未_{\text{s}}(魏)-\varepsilon$ a classifier achieving vanishing training error exists with high probability, while for $n/d\ge 未_{\text{s}}(魏)+\varepsilon$ it does not. Our bounds on $未_{\text{s}}(魏)$ match to the leading order as $魏\to -\infty$. We then analyze a linear programming algorithm to find a solution, and characterize the corresponding threshold $未_{\text{lin}}(魏)$. We observe a gap between the interpolation threshold $未_{\text{s}}(魏)$ and the linear programming threshold $未_{\text{lin}}(魏)$, raising the question of the behavior of other algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.15824v3-abstract-full').style.display = 'none'; document.getElementById('2110.15824v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">107 pages; 7 pdf figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Probab. Theory Relat. Fields 188, 805-910 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.08320">arXiv:2109.08320</a> <span> [<a href="https://arxiv.org/pdf/2109.08320">pdf</a>, <a href="https://arxiv.org/ps/2109.08320">ps</a>, <a href="https://arxiv.org/format/2109.08320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.4204/EPTCS.346.12">10.4204/EPTCS.346.12 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Adding the Relation Meets to the Temporal Logic of Prefixes and Infixes makes it EXPSPACE-Complete </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bozzelli%2C+L">Laura Bozzelli</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Peron%2C+A">Adriano Peron</a>, <a href="/search/cs?searchtype=author&query=Sala%2C+P">Pietro Sala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.08320v1-abstract-short" style="display: inline;"> The choice of the right trade-off between expressiveness and complexity is the main issue in interval temporal logic. In their seminal paper, Halpern and Shoham showed that the satisfiability problem for HS (the temporal logic of Allen's relations) is highly undecidable over any reasonable class of linear orders. In order to recover decidability, one can restrict the set of temporal modalities and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08320v1-abstract-full').style.display = 'inline'; document.getElementById('2109.08320v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.08320v1-abstract-full" style="display: none;"> The choice of the right trade-off between expressiveness and complexity is the main issue in interval temporal logic. In their seminal paper, Halpern and Shoham showed that the satisfiability problem for HS (the temporal logic of Allen's relations) is highly undecidable over any reasonable class of linear orders. In order to recover decidability, one can restrict the set of temporal modalities and/or the class of models. In the following, we focus on the satisfiability problem for HS fragments under the homogeneity assumption, according to which any proposition letter holds over an interval if only if it holds at all its points. The problem for full HS with homogeneity has been shown to be non-elementarily decidable, but its only known lower bound is EXPSPACE (in fact, EXPSPACE-hardness has been shown for the logic of prefixes and suffixes BE, which is a very small fragment of it. The logic of prefixes and infixes BD has been recently shown to be PSPACE-complete. In this paper, we prove that the addition of the Allen relation Meets to BD makes it EXPSPACE-complete. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08320v1-abstract-full').style.display = 'none'; document.getElementById('2109.08320v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings GandALF 2021, arXiv:2109.07798</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EPTCS 346, 2021, pp. 179-194 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.08319">arXiv:2109.08319</a> <span> [<a href="https://arxiv.org/pdf/2109.08319">pdf</a>, <a href="https://arxiv.org/format/2109.08319">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.4204/EPTCS.346.10">10.4204/EPTCS.346.10 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Expressiveness of Extended Bounded Response LTL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cimatti%2C+A">Alessandro Cimatti</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Tonetta%2C+S">Stefano Tonetta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.08319v1-abstract-short" style="display: inline;"> Extended Bounded Response LTL with Past (LTLEBR+P) is a safety fragment of Linear Temporal Logic with Past (LTL+P) that has been recently introduced in the context of reactive synthesis. The strength of LTLEBR+P is a fully symbolic compilation of formulas into symbolic deterministic automata. Its syntax is organized in four levels. The first three levels feature (a particular combination of) futur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08319v1-abstract-full').style.display = 'inline'; document.getElementById('2109.08319v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.08319v1-abstract-full" style="display: none;"> Extended Bounded Response LTL with Past (LTLEBR+P) is a safety fragment of Linear Temporal Logic with Past (LTL+P) that has been recently introduced in the context of reactive synthesis. The strength of LTLEBR+P is a fully symbolic compilation of formulas into symbolic deterministic automata. Its syntax is organized in four levels. The first three levels feature (a particular combination of) future temporal modalities, the last one admits only past temporal operators. At the base of such a structuring there are algorithmic motivations: each level corresponds to a step of the algorithm for the automaton construction. The complex syntax of LTLEBR+P made it difficult to precisely characterize its expressive power, and to compare it with other LTL+P safety fragments. In this paper, we first prove that LTLEBR+P is expressively complete with respect to the safety fragment of LTL+P, that is, any safety language definable in LTL+P can be formalized in LTLEBR+P, and vice versa. From this, it follows that LTLEBR+P and Safety-LTL are expressively equivalent. Then, we show that past modalities play an essential role in LTLEBR+P: we prove that the future fragment of LTLEBR+P is strictly less expressive than full LTLEBR+P. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08319v1-abstract-full').style.display = 'none'; document.getElementById('2109.08319v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings GandALF 2021, arXiv:2109.07798</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EPTCS 346, 2021, pp. 152-165 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.03947">arXiv:2109.03947</a> <span> [<a href="https://arxiv.org/pdf/2109.03947">pdf</a>, <a href="https://arxiv.org/format/2109.03947">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SensiX++: Bringing MLOPs and Multi-tenant Model Serving to Sensory Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Min%2C+C">Chulhong Min</a>, <a href="/search/cs?searchtype=author&query=Mathur%2C+A">Akhil Mathur</a>, <a href="/search/cs?searchtype=author&query=Acer%2C+U+G">Utku Gunay Acer</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Alessandro Montanari</a>, <a href="/search/cs?searchtype=author&query=Kawsar%2C+F">Fahim Kawsar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.03947v1-abstract-short" style="display: inline;"> We present SensiX++ - a multi-tenant runtime for adaptive model execution with integrated MLOps on edge devices, e.g., a camera, a microphone, or IoT sensors. SensiX++ operates on two fundamental principles - highly modular componentisation to externalise data operations with clear abstractions and document-centric manifestation for system-wide orchestration. First, a data coordinator manages the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.03947v1-abstract-full').style.display = 'inline'; document.getElementById('2109.03947v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.03947v1-abstract-full" style="display: none;"> We present SensiX++ - a multi-tenant runtime for adaptive model execution with integrated MLOps on edge devices, e.g., a camera, a microphone, or IoT sensors. SensiX++ operates on two fundamental principles - highly modular componentisation to externalise data operations with clear abstractions and document-centric manifestation for system-wide orchestration. First, a data coordinator manages the lifecycle of sensors and serves models with correct data through automated transformations. Next, a resource-aware model server executes multiple models in isolation through model abstraction, pipeline automation and feature sharing. An adaptive scheduler then orchestrates the best-effort executions of multiple models across heterogeneous accelerators, balancing latency and throughput. Finally, microservices with REST APIs serve synthesised model predictions, system statistics, and continuous deployment. Collectively, these components enable SensiX++ to serve multiple models efficiently with fine-grained control on edge devices while minimising data operation redundancy, managing data and device heterogeneity, reducing resource contention and removing manual MLOps. We benchmark SensiX++ with ten different vision and acoustics models across various multi-tenant configurations on different edge accelerators (Jetson AGX and Coral TPU) designed for sensory devices. We report on the overall throughput and quantified benefits of various automation components of SensiX++ and demonstrate its efficacy to significantly reduce operational complexity and lower the effort to deploy, upgrade, reconfigure and serve embedded models on edge devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.03947v1-abstract-full').style.display = 'none'; document.getElementById('2109.03947v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.00709">arXiv:2109.00709</a> <span> [<a href="https://arxiv.org/pdf/2109.00709">pdf</a>, <a href="https://arxiv.org/ps/2109.00709">ps</a>, <a href="https://arxiv.org/format/2109.00709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> An Information-Theoretic View of Stochastic Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Alaoui%2C+A+E">Ahmed El Alaoui</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.00709v2-abstract-short" style="display: inline;"> Given a probability measure $渭$ over ${\mathbb R}^n$, it is often useful to approximate it by the convex combination of a small number of probability measures, such that each component is close to a product measure. Recently, Ronen Eldan used a stochastic localization argument to prove a general decomposition result of this type. In Eldan's theorem, the `number of components' is characterized by t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.00709v2-abstract-full').style.display = 'inline'; document.getElementById('2109.00709v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.00709v2-abstract-full" style="display: none;"> Given a probability measure $渭$ over ${\mathbb R}^n$, it is often useful to approximate it by the convex combination of a small number of probability measures, such that each component is close to a product measure. Recently, Ronen Eldan used a stochastic localization argument to prove a general decomposition result of this type. In Eldan's theorem, the `number of components' is characterized by the entropy of the mixture, and `closeness to product' is characterized by the covariance matrix of each component. We present an elementary proof of Eldan's theorem which makes use of an information theory (or estimation theory) interpretation. The proof is analogous to the one of an earlier decomposition result known as the `pinning lemma.' <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.00709v2-abstract-full').style.display = 'none'; document.getElementById('2109.00709v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages; v2 corrects an annoying typo in the statement of the main theorem</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.04805">arXiv:2106.04805</a> <span> [<a href="https://arxiv.org/pdf/2106.04805">pdf</a>, <a href="https://arxiv.org/format/2106.04805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Streaming Belief Propagation for Community Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuchen Wu</a>, <a href="/search/cs?searchtype=author&query=Bateni%2C+M">MohammadHossein Bateni</a>, <a href="/search/cs?searchtype=author&query=Linhares%2C+A">Andre Linhares</a>, <a href="/search/cs?searchtype=author&query=de+Almeida%2C+F+M+G">Filipe Miguel Goncalves de Almeida</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Norouzi-Fard%2C+A">Ashkan Norouzi-Fard</a>, <a href="/search/cs?searchtype=author&query=Tardos%2C+J">Jakab Tardos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.04805v2-abstract-short" style="display: inline;"> The community detection problem requires to cluster the nodes of a network into a small number of well-connected "communities". There has been substantial recent progress in characterizing the fundamental statistical limits of community detection under simple stochastic block models. However, in real-world applications, the network structure is typically dynamic, with nodes that join over time. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.04805v2-abstract-full').style.display = 'inline'; document.getElementById('2106.04805v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.04805v2-abstract-full" style="display: none;"> The community detection problem requires to cluster the nodes of a network into a small number of well-connected "communities". There has been substantial recent progress in characterizing the fundamental statistical limits of community detection under simple stochastic block models. However, in real-world applications, the network structure is typically dynamic, with nodes that join over time. In this setting, we would like a detection algorithm to perform only a limited number of updates at each node arrival. While standard voting approaches satisfy this constraint, it is unclear whether they exploit the network information optimally. We introduce a simple model for networks growing over time which we refer to as streaming stochastic block model (StSBM). Within this model, we prove that voting algorithms have fundamental limitations. We also develop a streaming belief-propagation (StreamBP) approach, for which we prove optimality in certain regimes. We validate our theoretical findings on synthetic and real data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.04805v2-abstract-full').style.display = 'none'; document.getElementById('2106.04805v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.15996">arXiv:2103.15996</a> <span> [<a href="https://arxiv.org/pdf/2103.15996">pdf</a>, <a href="https://arxiv.org/format/2103.15996">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Minimum complexity interpolation in random features models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Celentano%2C+M">Michael Celentano</a>, <a href="/search/cs?searchtype=author&query=Misiakiewicz%2C+T">Theodor Misiakiewicz</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.15996v2-abstract-short" style="display: inline;"> Despite their many appealing properties, kernel methods are heavily affected by the curse of dimensionality. For instance, in the case of inner product kernels in $\mathbb{R}^d$, the Reproducing Kernel Hilbert Space (RKHS) norm is often very large for functions that depend strongly on a small subset of directions (ridge functions). Correspondingly, such functions are difficult to learn using kerne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15996v2-abstract-full').style.display = 'inline'; document.getElementById('2103.15996v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.15996v2-abstract-full" style="display: none;"> Despite their many appealing properties, kernel methods are heavily affected by the curse of dimensionality. For instance, in the case of inner product kernels in $\mathbb{R}^d$, the Reproducing Kernel Hilbert Space (RKHS) norm is often very large for functions that depend strongly on a small subset of directions (ridge functions). Correspondingly, such functions are difficult to learn using kernel methods. This observation has motivated the study of generalizations of kernel methods, whereby the RKHS norm -- which is equivalent to a weighted $\ell_2$ norm -- is replaced by a weighted functional $\ell_p$ norm, which we refer to as $\mathcal{F}_p$ norm. Unfortunately, tractability of these approaches is unclear. The kernel trick is not available and minimizing these norms requires to solve an infinite-dimensional convex problem. We study random features approximations to these norms and show that, for $p>1$, the number of random features required to approximate the original learning problem is upper bounded by a polynomial in the sample size. Hence, learning with $\mathcal{F}_p$ norms is tractable in these cases. We introduce a proof technique based on uniform concentration in the dual, which can be of broader interest in the study of overparametrized models. For $p= 1$, our guarantees for the random features approximation break down. We prove instead that learning with the $\mathcal{F}_1$ norm is $\mathsf{NP}$-hard under a randomized reduction based on the problem of learning halfspaces with noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15996v2-abstract-full').style.display = 'none'; document.getElementById('2103.15996v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.09177">arXiv:2103.09177</a> <span> [<a href="https://arxiv.org/pdf/2103.09177">pdf</a>, <a href="https://arxiv.org/format/2103.09177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Deep learning: a statistical viewpoint </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bartlett%2C+P+L">Peter L. Bartlett</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Rakhlin%2C+A">Alexander Rakhlin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.09177v1-abstract-short" style="display: inline;"> The remarkable practical success of deep learning has revealed some major surprises from a theoretical perspective. In particular, simple gradient methods easily find near-optimal solutions to non-convex optimization problems, and despite giving a near-perfect fit to training data without any explicit effort to control model complexity, these methods exhibit excellent predictive accuracy. We conje… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.09177v1-abstract-full').style.display = 'inline'; document.getElementById('2103.09177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.09177v1-abstract-full" style="display: none;"> The remarkable practical success of deep learning has revealed some major surprises from a theoretical perspective. In particular, simple gradient methods easily find near-optimal solutions to non-convex optimization problems, and despite giving a near-perfect fit to training data without any explicit effort to control model complexity, these methods exhibit excellent predictive accuracy. We conjecture that specific principles underlie these phenomena: that overparametrization allows gradient methods to find interpolating solutions, that these methods implicitly impose regularization, and that overparametrization leads to benign overfitting. We survey recent theoretical progress that provides examples illustrating these principles in simpler settings. We first review classical uniform convergence results and why they fall short of explaining aspects of the behavior of deep learning methods. We give examples of implicit regularization in simple settings, where gradient methods lead to minimal norm functions that perfectly fit the training data. Then we review prediction methods that exhibit benign overfitting, focusing on regression problems with quadratic loss. For these methods, we can decompose the prediction rule into a simple component that is useful for prediction and a spiky component that is useful for overfitting but, in a favorable setting, does not harm prediction accuracy. We focus specifically on the linear regime for neural networks, where the network can be approximated by a linear model. In this regime, we demonstrate the success of gradient flow, and we consider benign overfitting with two-layer networks, giving an exact asymptotic analysis that precisely demonstrates the impact of overparametrization. We conclude by highlighting the key challenges that arise in extending these insights to realistic deep learning settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.09177v1-abstract-full').style.display = 'none'; document.getElementById('2103.09177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.13219">arXiv:2102.13219</a> <span> [<a href="https://arxiv.org/pdf/2102.13219">pdf</a>, <a href="https://arxiv.org/format/2102.13219">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Learning with invariances in random features and kernel models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mei%2C+S">Song Mei</a>, <a href="/search/cs?searchtype=author&query=Misiakiewicz%2C+T">Theodor Misiakiewicz</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.13219v1-abstract-short" style="display: inline;"> A number of machine learning tasks entail a high degree of invariance: the data distribution does not change if we act on the data with a certain group of transformations. For instance, labels of images are invariant under translations of the images. Certain neural network architectures -- for instance, convolutional networks -- are believed to owe their success to the fact that they exploit such… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13219v1-abstract-full').style.display = 'inline'; document.getElementById('2102.13219v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.13219v1-abstract-full" style="display: none;"> A number of machine learning tasks entail a high degree of invariance: the data distribution does not change if we act on the data with a certain group of transformations. For instance, labels of images are invariant under translations of the images. Certain neural network architectures -- for instance, convolutional networks -- are believed to owe their success to the fact that they exploit such invariance properties. With the objective of quantifying the gain achieved by invariant architectures, we introduce two classes of models: invariant random features and invariant kernel methods. The latter includes, as a special case, the neural tangent kernel for convolutional networks with global average pooling. We consider uniform covariates distributions on the sphere and hypercube and a general invariant target function. We characterize the test error of invariant methods in a high-dimensional regime in which the sample size and number of hidden units scale as polynomials in the dimension, for a class of groups that we call `degeneracy $伪$', with $伪\leq 1$. We show that exploiting invariance in the architecture saves a $d^伪$ factor ($d$ stands for the dimension) in sample size and number of hidden units to achieve the same test error as for unstructured architectures. Finally, we show that output symmetrization of an unstructured kernel estimator does not give a significant statistical improvement; on the other hand, data augmentation with an unstructured kernel estimator is equivalent to an invariant kernel estimator and enjoys the same improvement in statistical efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13219v1-abstract-full').style.display = 'none'; document.getElementById('2102.13219v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">63 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62J99 (Primary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.06035">arXiv:2012.06035</a> <span> [<a href="https://arxiv.org/pdf/2012.06035">pdf</a>, <a href="https://arxiv.org/format/2012.06035">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> SensiX: A Platform for Collaborative Machine Learning on the Edge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Min%2C+C">Chulhong Min</a>, <a href="/search/cs?searchtype=author&query=Mathur%2C+A">Akhil Mathur</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Alessandro Montanari</a>, <a href="/search/cs?searchtype=author&query=Acer%2C+U+G">Utku Gunay Acer</a>, <a href="/search/cs?searchtype=author&query=Kawsar%2C+F">Fahim Kawsar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.06035v1-abstract-short" style="display: inline;"> The emergence of multiple sensory devices on or near a human body is uncovering new dynamics of extreme edge computing. In this, a powerful and resource-rich edge device such as a smartphone or a Wi-Fi gateway is transformed into a personal edge, collaborating with multiple devices to offer remarkable sensory al eapplications, while harnessing the power of locality, availability, and proximity. Na… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.06035v1-abstract-full').style.display = 'inline'; document.getElementById('2012.06035v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.06035v1-abstract-full" style="display: none;"> The emergence of multiple sensory devices on or near a human body is uncovering new dynamics of extreme edge computing. In this, a powerful and resource-rich edge device such as a smartphone or a Wi-Fi gateway is transformed into a personal edge, collaborating with multiple devices to offer remarkable sensory al eapplications, while harnessing the power of locality, availability, and proximity. Naturally, this transformation pushes us to rethink how to construct accurate, robust, and efficient sensory systems at personal edge. For instance, how do we build a reliable activity tracker with multiple on-body IMU-equipped devices? While the accuracy of sensing models is improving, their runtime performance still suffers, especially under this emerging multi-device, personal edge environments. Two prime caveats that impact their performance are device and data variabilities, contributed by several runtime factors, including device availability, data quality, and device placement. To this end, we present SensiX, a personal edge platform that stays between sensor data and sensing models, and ensures best-effort inference under any condition while coping with device and data variabilities without demanding model engineering. SensiX externalises model execution away from applications, and comprises of two essential functions, a translation operator for principled mapping of device-to-device data and a quality-aware selection operator to systematically choose the right execution path as a function of model accuracy. We report the design and implementation of SensiX and demonstrate its efficacy in developing motion and audio-based multi-device sensing systems. Our evaluation shows that SensiX offers a 7-13% increase in overall accuracy and up to 30% increase across different environment dynamics at the expense of 3mW power overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.06035v1-abstract-full').style.display = 'none'; document.getElementById('2012.06035v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 13 firues, 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68M99 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.03395">arXiv:2011.03395</a> <span> [<a href="https://arxiv.org/pdf/2011.03395">pdf</a>, <a href="https://arxiv.org/format/2011.03395">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Underspecification Presents Challenges for Credibility in Modern Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=D%27Amour%2C+A">Alexander D'Amour</a>, <a href="/search/cs?searchtype=author&query=Heller%2C+K">Katherine Heller</a>, <a href="/search/cs?searchtype=author&query=Moldovan%2C+D">Dan Moldovan</a>, <a href="/search/cs?searchtype=author&query=Adlam%2C+B">Ben Adlam</a>, <a href="/search/cs?searchtype=author&query=Alipanahi%2C+B">Babak Alipanahi</a>, <a href="/search/cs?searchtype=author&query=Beutel%2C+A">Alex Beutel</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Christina Chen</a>, <a href="/search/cs?searchtype=author&query=Deaton%2C+J">Jonathan Deaton</a>, <a href="/search/cs?searchtype=author&query=Eisenstein%2C+J">Jacob Eisenstein</a>, <a href="/search/cs?searchtype=author&query=Hoffman%2C+M+D">Matthew D. Hoffman</a>, <a href="/search/cs?searchtype=author&query=Hormozdiari%2C+F">Farhad Hormozdiari</a>, <a href="/search/cs?searchtype=author&query=Houlsby%2C+N">Neil Houlsby</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shaobo Hou</a>, <a href="/search/cs?searchtype=author&query=Jerfel%2C+G">Ghassen Jerfel</a>, <a href="/search/cs?searchtype=author&query=Karthikesalingam%2C+A">Alan Karthikesalingam</a>, <a href="/search/cs?searchtype=author&query=Lucic%2C+M">Mario Lucic</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yian Ma</a>, <a href="/search/cs?searchtype=author&query=McLean%2C+C">Cory McLean</a>, <a href="/search/cs?searchtype=author&query=Mincu%2C+D">Diana Mincu</a>, <a href="/search/cs?searchtype=author&query=Mitani%2C+A">Akinori Mitani</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Nado%2C+Z">Zachary Nado</a>, <a href="/search/cs?searchtype=author&query=Natarajan%2C+V">Vivek Natarajan</a>, <a href="/search/cs?searchtype=author&query=Nielson%2C+C">Christopher Nielson</a>, <a href="/search/cs?searchtype=author&query=Osborne%2C+T+F">Thomas F. Osborne</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.03395v2-abstract-short" style="display: inline;"> ML models often exhibit unexpectedly poor behavior when they are deployed in real-world domains. We identify underspecification as a key reason for these failures. An ML pipeline is underspecified when it can return many predictors with equivalently strong held-out performance in the training domain. Underspecification is common in modern ML pipelines, such as those based on deep learning. Predict… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.03395v2-abstract-full').style.display = 'inline'; document.getElementById('2011.03395v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.03395v2-abstract-full" style="display: none;"> ML models often exhibit unexpectedly poor behavior when they are deployed in real-world domains. We identify underspecification as a key reason for these failures. An ML pipeline is underspecified when it can return many predictors with equivalently strong held-out performance in the training domain. Underspecification is common in modern ML pipelines, such as those based on deep learning. Predictors returned by underspecified pipelines are often treated as equivalent based on their training domain performance, but we show here that such predictors can behave very differently in deployment domains. This ambiguity can lead to instability and poor model behavior in practice, and is a distinct failure mode from previously identified issues arising from structural mismatch between training and deployment domains. We show that this problem appears in a wide variety of practical ML pipelines, using examples from computer vision, medical imaging, natural language processing, clinical risk prediction based on electronic health records, and medical genomics. Our results show the need to explicitly account for underspecification in modeling pipelines that are intended for real-world deployment in any domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.03395v2-abstract-full').style.display = 'none'; document.getElementById('2011.03395v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Updates: Updated statistical analysis in Section 6; Additional citations</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.05335">arXiv:2008.05335</a> <span> [<a href="https://arxiv.org/pdf/2008.05335">pdf</a>, <a href="https://arxiv.org/format/2008.05335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Reactive Synthesis from Extended Bounded Response LTL Specifications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cimatti%2C+A">Alessandro Cimatti</a>, <a href="/search/cs?searchtype=author&query=Geatti%2C+L">Luca Geatti</a>, <a href="/search/cs?searchtype=author&query=Gigante%2C+N">Nicola Gigante</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Tonetta%2C+S">Stefano Tonetta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.05335v1-abstract-short" style="display: inline;"> Reactive synthesis is a key technique for the design of correct-by-construction systems and has been thoroughly investigated in the last decades. It consists in the synthesis of a controller that reacts to environment's inputs satisfying a given temporal logic specification. Common approaches are based on the explicit construction of automata and on their determinization, which limit their scalabi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05335v1-abstract-full').style.display = 'inline'; document.getElementById('2008.05335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.05335v1-abstract-full" style="display: none;"> Reactive synthesis is a key technique for the design of correct-by-construction systems and has been thoroughly investigated in the last decades. It consists in the synthesis of a controller that reacts to environment's inputs satisfying a given temporal logic specification. Common approaches are based on the explicit construction of automata and on their determinization, which limit their scalability. In this paper, we introduce a new fragment of Linear Temporal Logic, called Extended Bounded Response LTL (\LTLEBR), that allows one to combine bounded and universal unbounded temporal operators (thus covering a large set of practical cases), and we show that reactive synthesis from \LTLEBR specifications can be reduced to solving a safety game over a deterministic symbolic automaton built directly from the specification. We prove the correctness of the proposed approach and we successfully evaluate it on various benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05335v1-abstract-full').style.display = 'none'; document.getElementById('2008.05335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended Version</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.4; F.4.1; F.4.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.13716">arXiv:2007.13716</a> <span> [<a href="https://arxiv.org/pdf/2007.13716">pdf</a>, <a href="https://arxiv.org/format/2007.13716">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> The Lasso with general Gaussian designs with applications to hypothesis testing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Celentano%2C+M">Michael Celentano</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yuting Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.13716v3-abstract-short" style="display: inline;"> The Lasso is a method for high-dimensional regression, which is now commonly used when the number of covariates $p$ is of the same order or larger than the number of observations $n$. Classical asymptotic normality theory does not apply to this model due to two fundamental reasons: $(1)$ The regularized risk is non-smooth; $(2)$ The distance between the estimator $\widehat{\boldsymbol胃}$ and the t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13716v3-abstract-full').style.display = 'inline'; document.getElementById('2007.13716v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.13716v3-abstract-full" style="display: none;"> The Lasso is a method for high-dimensional regression, which is now commonly used when the number of covariates $p$ is of the same order or larger than the number of observations $n$. Classical asymptotic normality theory does not apply to this model due to two fundamental reasons: $(1)$ The regularized risk is non-smooth; $(2)$ The distance between the estimator $\widehat{\boldsymbol胃}$ and the true parameters vector $\boldsymbol胃^*$ cannot be neglected. As a consequence, standard perturbative arguments that are the traditional basis for asymptotic normality fail. On the other hand, the Lasso estimator can be precisely characterized in the regime in which both $n$ and $p$ are large and $n/p$ is of order one. This characterization was first obtained in the case of Gaussian designs with i.i.d. covariates: here we generalize it to Gaussian correlated designs with non-singular covariance structure. This is expressed in terms of a simpler ``fixed-design'' model. We establish non-asymptotic bounds on the distance between the distribution of various quantities in the two models, which hold uniformly over signals $\boldsymbol胃^*$ in a suitable sparsity class and over values of the regularization parameter. As an application, we study the distribution of the debiased Lasso and show that a degrees-of-freedom correction is necessary for computing valid confidence intervals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13716v3-abstract-full').style.display = 'none'; document.getElementById('2007.13716v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">final version accepted to Annals of Statistics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.12826">arXiv:2007.12826</a> <span> [<a href="https://arxiv.org/pdf/2007.12826">pdf</a>, <a href="https://arxiv.org/format/2007.12826">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> The Interpolation Phase Transition in Neural Networks: Memorization and Generalization under Lazy Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Y">Yiqiao Zhong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.12826v3-abstract-short" style="display: inline;"> Modern neural networks are often operated in a strongly overparametrized regime: they comprise so many parameters that they can interpolate the training set, even if actual labels are replaced by purely random ones. Despite this, they achieve good prediction error on unseen data: interpolating the training set does not lead to a large generalization error. Further, overparametrization appears to b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.12826v3-abstract-full').style.display = 'inline'; document.getElementById('2007.12826v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.12826v3-abstract-full" style="display: none;"> Modern neural networks are often operated in a strongly overparametrized regime: they comprise so many parameters that they can interpolate the training set, even if actual labels are replaced by purely random ones. Despite this, they achieve good prediction error on unseen data: interpolating the training set does not lead to a large generalization error. Further, overparametrization appears to be beneficial in that it simplifies the optimization landscape. Here we study these phenomena in the context of two-layers neural networks in the neural tangent (NT) regime. We consider a simple data model, with isotropic covariates vectors in $d$ dimensions, and $N$ hidden neurons. We assume that both the sample size $n$ and the dimension $d$ are large, and they are polynomially related. Our first main result is a characterization of the eigenstructure of the empirical NT kernel in the overparametrized regime $Nd\gg n$. This characterization implies as a corollary that the minimum eigenvalue of the empirical NT kernel is bounded away from zero as soon as $Nd\gg n$, and therefore the network can exactly interpolate arbitrary labels in the same regime. Our second main result is a characterization of the generalization error of NT ridge regression including, as a special case, min-$\ell_2$ norm interpolation. We prove that, as soon as $Nd\gg n$, the test error is well approximated by the one of kernel ridge regression with respect to the infinite-width kernel. The latter is in turn well approximated by the error of polynomial ridge regression, whereby the regularization parameter is increased by a `self-induced' term related to the high-degree components of the activation function. The polynomial degree depends on the sample size and the dimension (in particular on $\log n/\log d$). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.12826v3-abstract-full').style.display = 'none'; document.getElementById('2007.12826v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">83 pages, 5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62J07; 62H12 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.13409">arXiv:2006.13409</a> <span> [<a href="https://arxiv.org/pdf/2006.13409">pdf</a>, <a href="https://arxiv.org/format/2006.13409">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/1742-5468/ac3a81">10.1088/1742-5468/ac3a81 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> When Do Neural Networks Outperform Kernel Methods? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghorbani%2C+B">Behrooz Ghorbani</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+S">Song Mei</a>, <a href="/search/cs?searchtype=author&query=Misiakiewicz%2C+T">Theodor Misiakiewicz</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.13409v2-abstract-short" style="display: inline;"> For a certain scaling of the initialization of stochastic gradient descent (SGD), wide neural networks (NN) have been shown to be well approximated by reproducing kernel Hilbert space (RKHS) methods. Recent empirical work showed that, for some classification tasks, RKHS methods can replace NNs without a large loss in performance. On the other hand, two-layers NNs are known to encode richer smoothn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.13409v2-abstract-full').style.display = 'inline'; document.getElementById('2006.13409v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.13409v2-abstract-full" style="display: none;"> For a certain scaling of the initialization of stochastic gradient descent (SGD), wide neural networks (NN) have been shown to be well approximated by reproducing kernel Hilbert space (RKHS) methods. Recent empirical work showed that, for some classification tasks, RKHS methods can replace NNs without a large loss in performance. On the other hand, two-layers NNs are known to encode richer smoothness classes than RKHS and we know of special examples for which SGD-trained NN provably outperform RKHS. This is true even in the wide network limit, for a different scaling of the initialization. How can we reconcile the above claims? For which tasks do NNs outperform RKHS? If covariates are nearly isotropic, RKHS methods suffer from the curse of dimensionality, while NNs can overcome it by learning the best low-dimensional representation. Here we show that this curse of dimensionality becomes milder if the covariates display the same low-dimensional structure as the target function, and we precisely characterize this tradeoff. Building on these results, we present the spiked covariates model that can capture in a unified framework both behaviors observed in earlier work. We hypothesize that such a latent low-dimensional structure is present in image classification. We test numerically this hypothesis by showing that specific perturbations of the training distribution degrade the performances of RKHS methods much more significantly than NNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.13409v2-abstract-full').style.display = 'none'; document.getElementById('2006.13409v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">100 pages, 12 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62J99 (Primary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.04652">arXiv:2006.04652</a> <span> [<a href="https://arxiv.org/pdf/2006.04652">pdf</a>, <a href="https://arxiv.org/format/2006.04652">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.46298/lmcs-18(1:24)2022">10.46298/lmcs-18(1:24)2022 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Satisfiability and Model Checking for the Logic of Sub-Intervals under the Homogeneity Assumption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bozzelli%2C+L">Laura Bozzelli</a>, <a href="/search/cs?searchtype=author&query=Molinari%2C+A">Alberto Molinari</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angelo Montanari</a>, <a href="/search/cs?searchtype=author&query=Peron%2C+A">Adriano Peron</a>, <a href="/search/cs?searchtype=author&query=Sala%2C+P">Pietro Sala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.04652v3-abstract-short" style="display: inline;"> The expressive power of interval temporal logics (ITLs) makes them one of the most natural choices in a number of application domains, ranging from the specification and verification of complex reactive systems to automated planning. However, for a long time, because of their high computational complexity, they were considered not suitable for practical purposes. The recent discovery of several co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.04652v3-abstract-full').style.display = 'inline'; document.getElementById('2006.04652v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.04652v3-abstract-full" style="display: none;"> The expressive power of interval temporal logics (ITLs) makes them one of the most natural choices in a number of application domains, ranging from the specification and verification of complex reactive systems to automated planning. However, for a long time, because of their high computational complexity, they were considered not suitable for practical purposes. The recent discovery of several computationally well-behaved ITLs has finally changed the scenario. In this paper, we investigate the finite satisfiability and model checking problems for the ITL D, that has a single modality for the sub-interval relation, under the homogeneity assumption (that constrains a proposition letter to hold over an interval if and only if it holds over all its points). We first prove that the satisfiability problem for D, over finite linear orders, is PSPACE-complete, and then we show that the same holds for its model checking problem, over finite Kripke structures. In such a way, we enrich the set of tractable interval temporal logics with a new meaningful representative. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.04652v3-abstract-full').style.display = 'none'; document.getElementById('2006.04652v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:1901.03880</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Logical Methods in Computer Science, Volume 18, Issue 1 (February 1, 2022) lmcs:6542 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.12903">arXiv:2002.12903</a> <span> [<a href="https://arxiv.org/pdf/2002.12903">pdf</a>, <a href="https://arxiv.org/ps/2002.12903">ps</a>, <a href="https://arxiv.org/format/2002.12903">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> The estimation error of general first order methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Celentano%2C+M">Michael Celentano</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuchen Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.12903v2-abstract-short" style="display: inline;"> Modern large-scale statistical models require to estimate thousands to millions of parameters. This is often accomplished by iterative algorithms such as gradient descent, projected gradient descent or their accelerated versions. What are the fundamental limits to these approaches? This question is well understood from an optimization viewpoint when the underlying objective is convex. Work in this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.12903v2-abstract-full').style.display = 'inline'; document.getElementById('2002.12903v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.12903v2-abstract-full" style="display: none;"> Modern large-scale statistical models require to estimate thousands to millions of parameters. This is often accomplished by iterative algorithms such as gradient descent, projected gradient descent or their accelerated versions. What are the fundamental limits to these approaches? This question is well understood from an optimization viewpoint when the underlying objective is convex. Work in this area characterizes the gap to global optimality as a function of the number of iterations. However, these results have only indirect implications in terms of the gap to statistical optimality. Here we consider two families of high-dimensional estimation problems: high-dimensional regression and low-rank matrix estimation, and introduce a class of `general first order methods' that aim at efficiently estimating the underlying parameters. This class of algorithms is broad enough to include classical first order optimization (for convex and non-convex objectives), but also other types of algorithms. Under a random design assumption, we derive lower bounds on the estimation error that hold in the high-dimensional asymptotics in which both the number of observations and the number of parameters diverge. These lower bounds are optimal in the sense that there exist algorithms whose estimation error matches the lower bounds up to asymptotically negligible terms. We illustrate our general results through applications to sparse phase retrieval and sparse principal component analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.12903v2-abstract-full').style.display = 'none'; document.getElementById('2002.12903v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">49 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.00905">arXiv:1912.00905</a> <span> [<a href="https://arxiv.org/pdf/1912.00905">pdf</a>, <a href="https://arxiv.org/ps/1912.00905">ps</a>, <a href="https://arxiv.org/format/1912.00905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Matrix sketching for supervised classification with imbalanced classes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Falcone%2C+R">Roberta Falcone</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Angela Montanari</a>, <a href="/search/cs?searchtype=author&query=Anderlucci%2C+L">Laura Anderlucci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.00905v1-abstract-short" style="display: inline;"> Matrix sketching is a recently developed data compression technique. An input matrix A is efficiently approximated with a smaller matrix B, so that B preserves most of the properties of A up to some guaranteed approximation ratio. In so doing numerical operations on big data sets become faster. Sketching algorithms generally use random projections to compress the original dataset and this stochast… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.00905v1-abstract-full').style.display = 'inline'; document.getElementById('1912.00905v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.00905v1-abstract-full" style="display: none;"> Matrix sketching is a recently developed data compression technique. An input matrix A is efficiently approximated with a smaller matrix B, so that B preserves most of the properties of A up to some guaranteed approximation ratio. In so doing numerical operations on big data sets become faster. Sketching algorithms generally use random projections to compress the original dataset and this stochastic generation process makes them amenable to statistical analysis. The statistical properties of sketching algorithms have been widely studied in the context of multiple linear regression. In this paper we propose matrix sketching as a tool for rebalancing class sizes in supervised classification with imbalanced classes. It is well-known in fact that class imbalance may lead to poor classification performances especially as far as the minority class is concerned. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.00905v1-abstract-full').style.display = 'none'; document.getElementById('1912.00905v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.08899">arXiv:1906.08899</a> <span> [<a href="https://arxiv.org/pdf/1906.08899">pdf</a>, <a href="https://arxiv.org/format/1906.08899">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Limitations of Lazy Training of Two-layers Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghorbani%2C+B">Behrooz Ghorbani</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+S">Song Mei</a>, <a href="/search/cs?searchtype=author&query=Misiakiewicz%2C+T">Theodor Misiakiewicz</a>, <a href="/search/cs?searchtype=author&query=Montanari%2C+A">Andrea Montanari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.08899v1-abstract-short" style="display: inline;"> We study the supervised learning problem under either of the following two models: (1) Feature vectors ${\boldsymbol x}_i$ are $d$-dimensional Gaussians and responses are $y_i = f_*({\boldsymbol x}_i)$ for $f_*$ an unknown quadratic function; (2) Feature vectors ${\boldsymbol x}_i$ are distributed as a mixture of two $d$-dimensional centered Gaussians, and $y_i$'s are the corresponding class label… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.08899v1-abstract-full').style.display = 'inline'; document.getElementById('1906.08899v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.08899v1-abstract-full" style="display: none;"> We study the supervised learning problem under either of the following two models: (1) Feature vectors ${\boldsymbol x}_i$ are $d$-dimensional Gaussians and responses are $y_i = f_*({\boldsymbol x}_i)$ for $f_*$ an unknown quadratic function; (2) Feature vectors ${\boldsymbol x}_i$ are distributed as a mixture of two $d$-dimensional centered Gaussians, and $y_i$'s are the corresponding class labels. We use two-layers neural networks with quadratic activations, and compare three different learning regimes: the random features (RF) regime in which we only train the second-layer weights; the neural tangent (NT) regime in which we train a linearization of the neural network around its initialization; the fully trained neural network (NN) regime in which we train all the weights in the network. We prove that, even for the simple quadratic model of point (1), there is a potentially unbounded gap between the prediction risk achieved in these three training regimes, when the number of neurons is smaller than the ambient dimension. When the number of neurons is larger than the number of dimensions, the problem is significantly easier and both NT and NN learning achieve zero risk. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.08899v1-abstract-full').style.display = 'none'; document.getElementById('1906.08899v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages; 2 pdf figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Montanari%2C+A&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository