Statistics Theory

<!DOCTYPE html> <html lang="en"> <head> <title>Statistics Theory </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/slider.css?v=20250312" /> <script src="//code.jquery.com/jquery-latest.min.js" type="text/javascript"></script> <script type="text/javascript" src="/static/browse/0.3.4/js/donate.js?v=040725"></script><script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <aside class="slider-wrapper bps-banner forum green"> <a class="close-slider do-close-slider bps-banner" href="#"><img src="/static/browse/0.3.4/images/icons/close-slider.png" alt="close this message"></a> <div class="columns"> <img role="presentation" class="bps-banner-image" src="/static/browse/0.3.4/images/icons/smileybones-pixel.png" alt="arXiv smileybones"> <div class="copy-donation bps-banner"> <h2>arXiv Is Hiring Software Developers</h2> <p>Work on one of the world's most important websites and make an impact on open science.</p> </div> <div class="amount-donation bps-banner"> <div class="donate-cta"><a class="banner_link banner-btn-grad" target="_blank" href="https://info.arxiv.org/hiring/index.html"><b>View Jobs</b></a></div> </div> </div> </aside> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div> <div class="column banner-minimal forum"> <p>arXiv Is Hiring Software Devs</p> <a href="https://info.arxiv.org/hiring/index.html" target="_blank">View Jobs</a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/math.ST/recent">math.ST</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Statistics Theory</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item4">Cross-lists</a></li> <li><a href="#item7">Replacements</a></li> </ul> <p>See <a id="recent-math.ST" aria-labelledby="recent-math.ST" href="/list/math.ST/recent">recent</a> articles</p> <h3>Showing new listings for Friday, 11 April 2025</h3> <div class='paging'>Total of 11 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/math.ST/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 3 of 3 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2504.07351" title="Abstract" id="2504.07351"> arXiv:2504.07351 </a> [<a href="/pdf/2504.07351" title="Download PDF" id="pdf-2504.07351" aria-labelledby="pdf-2504.07351">pdf</a>, <a href="https://arxiv.org/html/2504.07351v1" title="View HTML" id="html-2504.07351" aria-labelledby="html-2504.07351" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.07351" title="Other formats" id="oth-2504.07351" aria-labelledby="oth-2504.07351">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A GARMA Framework for Unit-Bounded Time Series Based on the Unit-Lindley Distribution with Application to Renewable Energy Data </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Pumi,+G">Guilherme Pumi</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Matsuoka,+D+H">Danilo Hiroshi Matsuoka</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Prass,+T+S">Taiane Schaedler Prass</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2502.18645" data-arxiv-id="2502.18645" class="link-https">arXiv:2502.18645</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Applications (stat.AP) </div> <p class='mathjax'> The Unit-Lindley is a one-parameter family of distributions in $(0,1)$ obtained from an appropriate transformation of the Lindley distribution. In this work, we introduce a class of dynamical time series models for continuous random variables taking values in $(0,1)$ based on the Unit-Lindley distribution. The models pertaining to the proposed class are observation-driven ones for which, conditionally on a set of covariates, the random component is modeled by a Unit-Lindley distribution. The systematic component aims at modeling the conditional mean through a dynamical structure resembling the classical ARMA models. Parameter estimation in conducted using partial maximum likelihood, for which an asymptotic theory is available. Based on asymptotic results, the construction of confidence intervals, hypotheses testing, model selection, and forecasting can be carried on. A Monte Carlo simulation study is conducted to assess the finite sample performance of the proposed partial maximum likelihood approach. Finally, an application considering forecasting of the proportion of net electricity generated by conventional hydroelectric power in the United States is presented. The application show the versatility of the proposed method compared to other benchmarks models in the literature. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2504.07704" title="Abstract" id="2504.07704"> arXiv:2504.07704 </a> [<a href="/pdf/2504.07704" title="Download PDF" id="pdf-2504.07704" aria-labelledby="pdf-2504.07704">pdf</a>, <a href="https://arxiv.org/html/2504.07704v1" title="View HTML" id="html-2504.07704" aria-labelledby="html-2504.07704" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.07704" title="Other formats" id="oth-2504.07704" aria-labelledby="oth-2504.07704">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Measures of non-simplifyingness for conditional copulas and vines </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Derumigny,+A">Alexis Derumigny</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Other Statistics (stat.OT) </div> <p class='mathjax'> In copula modeling, the simplifying assumption has recently been the object of much interest. Although it is very useful to reduce the computational burden, it remains far from obvious whether it is actually satisfied in practice. We propose a theoretical framework which aims at giving a precise meaning to the following question: how non-simplified or close to be simplified is a given conditional copula? For this, we propose a theoretical framework centered at the notion of measure of non-constantness. Then we discuss generalizations of the simplifying assumption to the case where the conditional marginal distributions may not be continuous, and corresponding measures of non-simplifyingness in this case. The simplifying assumption is of particular importance for vine copula models, and we therefore propose a notion of measure of non-simplifyingness of a given copula for a particular vine structure, as well as different scores measuring how non-simplified such a vine decompositions would be for a general vine. Finally, we propose estimators for these measures of non-simplifyingness given an observed dataset. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2504.07921" title="Abstract" id="2504.07921"> arXiv:2504.07921 </a> [<a href="/pdf/2504.07921" title="Download PDF" id="pdf-2504.07921" aria-labelledby="pdf-2504.07921">pdf</a>, <a href="/format/2504.07921" title="Other formats" id="oth-2504.07921" aria-labelledby="oth-2504.07921">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Note on the identification of total effect in Cluster-DAGs with cycles </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Yvernes,+C">Cl茅ment Yvernes</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In this note, we discuss the identifiability of a total effect in cluster-DAGs, allowing for cycles within the cluster-DAG (while still assuming the associated underlying DAG to be acyclic). This is presented into two key results: first, restricting the cluster-DAG to clusters containing at most four nodes; second, adapting the notion of d-separation. We provide a graphical criterion to address the identifiability problem. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 3 of 3 entries)</h3> <dt> <a name='item4'>[4]</a> <a href ="/abs/2504.07133" title="Abstract" id="2504.07133"> arXiv:2504.07133 </a> (cross-list from stat.ML) [<a href="/pdf/2504.07133" title="Download PDF" id="pdf-2504.07133" aria-labelledby="pdf-2504.07133">pdf</a>, <a href="/format/2504.07133" title="Other formats" id="oth-2504.07133" aria-labelledby="oth-2504.07133">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Can SGD Select Good Fishermen? Local Convergence under Self-Selection Biases and Beyond </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Kalavasis,+A">Alkis Kalavasis</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Mehrotra,+A">Anay Mehrotra</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhou,+F">Felix Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Data Structures and Algorithms (cs.DS); Machine Learning (cs.LG); Statistics Theory (math.ST) </div> <p class='mathjax'> We revisit the problem of estimating $k$ linear regressors with self-selection bias in $d$ dimensions with the maximum selection criterion, as introduced by Cherapanamjeri, Daskalakis, Ilyas, and Zampetakis [CDIZ23, STOC'23]. Our main result is a $\operatorname{poly}(d,k,1/\varepsilon) + {k}^{O(k)}$ time algorithm for this problem, which yields an improvement in the running time of the algorithms of [CDIZ23] and [GM24, arXiv]. We achieve this by providing the first local convergence algorithm for self-selection, thus resolving the main open question of [CDIZ23]. <br>To obtain this algorithm, we reduce self-selection to a seemingly unrelated statistical problem called coarsening. Coarsening occurs when one does not observe the exact value of the sample but only some set (a subset of the sample space) that contains the exact value. Inference from coarse samples arises in various real-world applications due to rounding by humans and algorithms, limited precision of instruments, and lag in multi-agent systems. <br>Our reduction to coarsening is intuitive and relies on the geometry of the self-selection problem, which enables us to bypass the limitations of previous analytic approaches. To demonstrate its applicability, we provide a local convergence algorithm for linear regression under another self-selection criterion, which is related to second-price auction data. Further, we give the first polynomial time local convergence algorithm for coarse Gaussian mean estimation given samples generated from a convex partition. Previously, only a sample-efficient algorithm was known due to Fotakis, Kalavasis, Kontonis, and Tzamos [FKKT21, COLT'21]. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2504.07384" title="Abstract" id="2504.07384"> arXiv:2504.07384 </a> (cross-list from q-bio.PE) [<a href="/pdf/2504.07384" title="Download PDF" id="pdf-2504.07384" aria-labelledby="pdf-2504.07384">pdf</a>, <a href="/format/2504.07384" title="Other formats" id="oth-2504.07384" aria-labelledby="oth-2504.07384">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Convergence-divergence models: Generalizations of phylogenetic trees modeling gene flow over time </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Mitchell,+J+D">Jonathan D. Mitchell</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Holland,+B+R">Barbara R. Holland</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 73 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Populations and Evolution (q-bio.PE)</span>; Statistics Theory (math.ST); Quantitative Methods (q-bio.QM) </div> <p class='mathjax'> Phylogenetic trees are simple models of evolutionary processes. They describe conditionally independent divergent evolution of taxa from common ancestors. Phylogenetic trees commonly do not have enough flexibility to adequately model all evolutionary processes. For example, introgressive hybridization, where genes can flow from one taxon to another. Phylogenetic networks model evolution not fully described by a phylogenetic tree. However, many phylogenetic network models assume ancestral taxa merge instantaneously to form ``hybrid'' descendant taxa. In contrast, our convergence-divergence models retain a single underlying ``principal'' tree, but permit gene flow over arbitrary time frames. Alternatively, convergence-divergence models can describe other biological processes leading to taxa becoming more similar over a time frame, such as replicated evolution. Here we present novel maximum likelihood-based algorithms to infer most aspects of $N$-taxon convergence-divergence models, many consistently, using a quartet-based approach. The algorithms can be applied to multiple sequence alignments restricted to genes or genomic windows or to gene presence/absence datasets. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2504.07522" title="Abstract" id="2504.07522"> arXiv:2504.07522 </a> (cross-list from cs.LG) [<a href="/pdf/2504.07522" title="Download PDF" id="pdf-2504.07522" aria-labelledby="pdf-2504.07522">pdf</a>, <a href="https://arxiv.org/html/2504.07522v1" title="View HTML" id="html-2504.07522" aria-labelledby="html-2504.07522" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.07522" title="Other formats" id="oth-2504.07522" aria-labelledby="oth-2504.07522">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Adversarial Subspace Generation for Outlier Detection in High-Dimensional Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cribeiro-Ramallo,+J">Jose Cribeiro-Ramallo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matteucci,+F">Federico Matteucci</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Enciu,+P">Paul Enciu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jenke,+A">Alexander Jenke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arzamasov,+V">Vadim Arzamasov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Strufe,+T">Thorsten Strufe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%B6hm,+K">Klemens B枚hm</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 35 pages, pre-print </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Statistics Theory (math.ST) </div> <p class='mathjax'> Outlier detection in high-dimensional tabular data is challenging since data is often distributed across multiple lower-dimensional subspaces -- a phenomenon known as the Multiple Views effect (MV). This effect led to a large body of research focused on mining such subspaces, known as subspace selection. However, as the precise nature of the MV effect was not well understood, traditional methods had to rely on heuristic-driven search schemes that struggle to accurately capture the true structure of the data. Properly identifying these subspaces is critical for unsupervised tasks such as outlier detection or clustering, where misrepresenting the underlying data structure can hinder the performance. We introduce Myopic Subspace Theory (MST), a new theoretical framework that mathematically formulates the Multiple Views effect and writes subspace selection as a stochastic optimization problem. Based on MST, we introduce V-GAN, a generative method trained to solve such an optimization problem. This approach avoids any exhaustive search over the feature space while ensuring that the intrinsic data structure is preserved. Experiments on 42 real-world datasets show that using V-GAN subspaces to build ensemble methods leads to a significant increase in one-class classification performance -- compared to existing subspace selection, feature selection, and embedding methods. Further experiments on synthetic data show that V-GAN identifies subspaces more accurately while scaling better than other relevant subspace selection methods. These results confirm the theoretical guarantees of our approach and also highlight its practical viability in high-dimensional settings. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 5 of 5 entries)</h3> <dt> <a name='item7'>[7]</a> <a href ="/abs/2404.15764" title="Abstract" id="2404.15764"> arXiv:2404.15764 </a> (replaced) [<a href="/pdf/2404.15764" title="Download PDF" id="pdf-2404.15764" aria-labelledby="pdf-2404.15764">pdf</a>, <a href="https://arxiv.org/html/2404.15764v5" title="View HTML" id="html-2404.15764" aria-labelledby="html-2404.15764" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.15764" title="Other formats" id="oth-2404.15764" aria-labelledby="oth-2404.15764">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Assessment of the quality of a prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Sewell,+R">Roger Sewell</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 3 figures; v5 fixes reference numbering and missing details for reference 13, and author list in metadata </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Methodology (stat.ME) </div> <p class='mathjax'> Shannon defined the mutual information between two variables. We illustrate why the true mutual information between a variable and the predictions made by a prediction algorithm is not a suitable measure of prediction quality, but the apparent Shannon mutual information (ASI) is; indeed it is the unique prediction quality measure with either of two very different lists of desirable properties, as previously shown by de Finetti and other authors. However, estimating the uncertainty of the ASI is a difficult problem, because of long and non-symmetric heavy tails to the distribution of the individual values of $j(x,y)=\log\frac{Q_y(x)}{P(x)}$ We propose a Bayesian modelling method for the distribution of $j(x,y)$, from the posterior distribution of which the uncertainty in the ASI can be inferred. This method is based on Dirichlet-based mixtures of skew-Student distributions. We illustrate its use on data from a Bayesian model for prediction of the recurrence time of prostate cancer. We believe that this approach is generally appropriate for most problems, where it is infeasible to derive the explicit distribution of the samples of $j(x,y)$, though the precise modelling parameters may need adjustment to suit particular cases. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2408.04359" title="Abstract" id="2408.04359"> arXiv:2408.04359 </a> (replaced) [<a href="/pdf/2408.04359" title="Download PDF" id="pdf-2408.04359" aria-labelledby="pdf-2408.04359">pdf</a>, <a href="/format/2408.04359" title="Other formats" id="oth-2408.04359" aria-labelledby="oth-2408.04359">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Advances in Bayesian model selection consistency for high-dimensional generalized linear models </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Lee,+J">Jeyong Lee</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Chae,+M">Minwoo Chae</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Martin,+R">Ryan Martin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to the Annals of Statistics </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span> </div> <p class='mathjax'> Uncovering genuine relationships between a response variable of interest and a large collection of covariates is a fundamental and practically important problem. In the context of Gaussian linear models, both the Bayesian and non-Bayesian literature is well-developed and there are no substantial differences in the model selection consistency results available from the two schools. For the more challenging generalized linear models (GLMs), however, Bayesian model selection consistency results are lacking in several ways. In this paper, we construct a Bayesian posterior distribution using an appropriate data-dependent prior and develop its asymptotic concentration properties using new theoretical techniques. In particular, we leverage Spokoiny's powerful non-asymptotic theory to obtain sharp quadratic approximations of the GLM's log-likelihood function, which leads to tight bounds on the errors associated with the model-specific maximum likelihood estimators and the Laplace approximation of our Bayesian marginal likelihood. In turn, these improved bounds lead to significantly stronger, near-optimal Bayesian model selection consistency results, e.g., far weaker beta-min conditions, compared to those available in the existing literature. In particular, our results are applicable to the Poisson regression model, in which the score function is not sub-Gaussian. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2410.03041" title="Abstract" id="2410.03041"> arXiv:2410.03041 </a> (replaced) [<a href="/pdf/2410.03041" title="Download PDF" id="pdf-2410.03041" aria-labelledby="pdf-2410.03041">pdf</a>, <a href="https://arxiv.org/html/2410.03041v3" title="View HTML" id="html-2410.03041" aria-labelledby="html-2410.03041" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.03041" title="Other formats" id="oth-2410.03041" aria-labelledby="oth-2410.03041">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Minmax Trend Filtering: Generalizations of Total Variation Denoising via a Local Minmax/Maxmin Formula </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Chatterjee,+S">Sabyasachi Chatterjee</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Total Variation Denoising (TVD) is a fundamental denoising and smoothing method. In this article, we identify a new local minmax/maxmin formula producing two estimators which sandwich the univariate TVD estimator at every point. Operationally, this formula gives a local definition of TVD as a minmax/maxmin of a simple function of local averages. Moreover we find that this minmax/maxmin formula is generalizeable and can be used to define other TVD like estimators. In this article we propose and study higher order polynomial versions of TVD which are defined pointwise lying between minmax and maxmin optimizations of penalized local polynomial regressions over intervals of different scales. These appear to be new nonparametric regression methods, different from usual Trend Filtering and any other existing method in the nonparametric regression toolbox. We call these estimators Minmax Trend Filtering (MTF). We show how the proposed local definition of TVD/MTF estimator makes it tractable to bound pointwise estimation errors in terms of a local bias variance like trade-off. This type of local analysis of TVD/MTF is new and arguably simpler than existing analyses of TVD/Trend Filtering. In particular, apart from minimax rate optimality over bounded variation and piecewise polynomial classes, our pointwise estimation error bounds also enable us to derive local rates of convergence for (locally) Holder Smooth signals. These local rates offer a new pointwise explanation of local adaptivity of TVD/MTF instead of global (MSE) based justifications. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2309.15408" title="Abstract" id="2309.15408"> arXiv:2309.15408 </a> (replaced) [<a href="/pdf/2309.15408" title="Download PDF" id="pdf-2309.15408" aria-labelledby="pdf-2309.15408">pdf</a>, <a href="https://arxiv.org/html/2309.15408v4" title="View HTML" id="html-2309.15408" aria-labelledby="html-2309.15408" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2309.15408" title="Other formats" id="oth-2309.15408" aria-labelledby="oth-2309.15408">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A smoothed-Bayesian approach to frequency recovery from sketched data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Beraha,+M">Mario Beraha</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Favaro,+S">Stefano Favaro</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sesia,+M">Matteo Sesia</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Data Structures and Algorithms (cs.DS); Information Retrieval (cs.IR); Statistics Theory (math.ST) </div> <p class='mathjax'> We provide a novel statistical perspective on a classical problem at the intersection of computer science and information theory: recovering the empirical frequency of a symbol in a large discrete dataset using only a compressed representation, or sketch, obtained via random hashing. Departing from traditional algorithmic approaches, recent works have proposed Bayesian nonparametric (BNP) methods that can provide more informative frequency estimates by leveraging modeling assumptions about the distribution of the sketched data. In this paper, we propose a smoothed-Bayesian method, inspired by existing BNP approaches but designed in a frequentist framework to overcome the computational limitations of the BNP approaches when dealing with large-scale data from realistic distributions, including those with power-law tail behaviors. For sketches obtained with a single hash function, our approach is supported by rigorous frequentist properties, including unbiasedness and optimality under a squared error loss function within an intuitive class of linear estimators. For sketches with multiple hash functions, we introduce an approach based on multi-view learning to construct computationally efficient frequency estimators. We validate our method on synthetic and real data, comparing its performance to that of existing alternatives. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2404.06803" title="Abstract" id="2404.06803"> arXiv:2404.06803 </a> (replaced) [<a href="/pdf/2404.06803" title="Download PDF" id="pdf-2404.06803" aria-labelledby="pdf-2404.06803">pdf</a>, <a href="/format/2404.06803" title="Other formats" id="oth-2404.06803" aria-labelledby="oth-2404.06803">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A new way to evaluate G-Wishart normalising constants via Fourier analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Wong,+C">Ching Wong</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Moffa,+G">Giusi Moffa</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Kuipers,+J">Jack Kuipers</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Statistics Theory (math.ST) </div> <p class='mathjax'> The G-Wishart distribution is an essential component for the Bayesian analysis of Gaussian graphical models as the conjugate prior for the precision matrix. Evaluating the marginal likelihood of such models usually requires computing high-dimensional integrals to determine the G-Wishart normalising constant. Closed-form results are known for decomposable or chordal graphs, while an explicit representation as a formal series expansion has been derived recently for general graphs. The nested infinite sums, however, do not lend themselves to computation, remaining of limited practical value. Borrowing techniques from random matrix theory and Fourier analysis, we provide novel exact results well suited to the numerical evaluation of the normalising constant for classes of graphs beyond chordal graphs. </p> </div> </dd> </dl> <div class='paging'>Total of 11 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/math.ST/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Statistics Theory