CINXE.COM

<!DOCTYPE html> <html lang="en"> <head> <title>Methodology </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div> <div class="column banner-minimal forum"> <p>arXiv Is Hiring Software Devs</p> <a href="https://info.arxiv.org/hiring/index.html" target="_blank">View Jobs</a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/stat.ME/recent">stat.ME</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Methodology</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item16">Cross-lists</a></li> <li><a href="#item18">Replacements</a></li> </ul> <p>See <a id="recent-stat.ME" aria-labelledby="recent-stat.ME" href="/list/stat.ME/recent">recent</a> articles</p> <h3>Showing new listings for Thursday, 17 April 2025</h3> <div class='paging'>Total of 31 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/stat.ME/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 15 of 15 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2504.11550" title="Abstract" id="2504.11550"> arXiv:2504.11550 </a> [<a href="/pdf/2504.11550" title="Download PDF" id="pdf-2504.11550" aria-labelledby="pdf-2504.11550">pdf</a>, <a href="https://arxiv.org/html/2504.11550v1" title="View HTML" id="html-2504.11550" aria-labelledby="html-2504.11550" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11550" title="Other formats" id="oth-2504.11550" aria-labelledby="oth-2504.11550">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Novel Strategy for Detecting Multiple Mediators in High-Dimensional Mediation Models </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Yen,+P">Pei-Shan Yen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sahu,+S">Soumya Sahu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Nandi,+D">Debarghya Nandi</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhou,+Z">Zhaoliang Zhou</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ajilore,+O">Olusola Ajilore</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bhaumik,+D">Dulal Bhaumik</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> This article presents a novel methodology for detecting multiple biomarkers in high-dimensional mediation models by utilizing a modified Least Absolute Shrinkage and Selection Operator (LASSO) alongside Pathway LASSO. This approach effectively addresses the problem of overestimating direct effects, which can result in the inaccurate identification of mediators with nonzero indirect effects. To mitigate this overestimation and improve the true positive rate for detecting mediators, two constraints on the $L_1$-norm penalty are introduced. The proposed methodology's effectiveness is demonstrated through extensive simulations across various scenarios, highlighting its robustness and reliability under different conditions. Furthermore, a procedure for selecting an optimal threshold for dimension reduction using sure independence screening is introduced, enhancing the accuracy of true biomarker detection and yielding a final model that is both robust and well-suited for real-world applications. To illustrate the practical utility of this methodology, the results are applied to a study dataset involving patients with internalizing psychopathology, showcasing its applicability in clinical settings. Overall, this methodology signifies a substantial advancement in biomarker detection within high-dimensional mediation models, offering promising implications for both research and clinical practices. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2504.11579" title="Abstract" id="2504.11579"> arXiv:2504.11579 </a> [<a href="/pdf/2504.11579" title="Download PDF" id="pdf-2504.11579" aria-labelledby="pdf-2504.11579">pdf</a>, <a href="https://arxiv.org/html/2504.11579v1" title="View HTML" id="html-2504.11579" aria-labelledby="html-2504.11579" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11579" title="Other formats" id="oth-2504.11579" aria-labelledby="oth-2504.11579">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Mapping Multivariate Phenotypes in the Presence of Missing Observations for Family-Based Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Sahu,+S">Soumya Sahu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ghosh,+S">Saurabh Ghosh</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Clinical end-point traits are often characterized by quantitative or qualitative precursors and it has been argued that it may be statistically a more powerful strategy to analyze these precursor traits to decipher the genetic architecture of the underlying complex end-point trait. While association methods for both quantitative and qualitative traits have been extensively developed to analyze population level data, development of such methods are of current research interest for family-level data that pose additional challenges of incorporation of correlation of trait values within a family. Haldar and Ghosh (2015) developed a test which is Statistical equivalent of the classical TDT for quantitative traits and multivariate phenotypes. The model does not require a priori assumptions on the probability distributions of the phenotypes. However, it may often arise in practice that data on the phenotype of interest may not be available for all offspring in a nuclear family. In this study, we explore methodologies to estimate missing phenotypes conditioned on the available ones and carry out the transmission-based test for association on the 'complete' data. We consider three types of phenotypes: continuous, count and categorical. For a missing continuous phenotype, the trait value is estimated using a conditional normal model. For a missing count phenotypes, the trait value is estimated using a conditional Poisson model. For a missing categorical phenotype, the risk of the phenotype status is estimated using a conditional logistic model. We shall carry out simulations under a wide spectrum of genetic models and assess the effect of the proposed imputation strategy on the power of the association test vis-脿-vis the the ideal situation with no missing data. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2504.11583" title="Abstract" id="2504.11583"> arXiv:2504.11583 </a> [<a href="/pdf/2504.11583" title="Download PDF" id="pdf-2504.11583" aria-labelledby="pdf-2504.11583">pdf</a>, <a href="https://arxiv.org/html/2504.11583v1" title="View HTML" id="html-2504.11583" aria-labelledby="html-2504.11583" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11583" title="Other formats" id="oth-2504.11583" aria-labelledby="oth-2504.11583">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Kernel-based Method for Detecting Structural Break in Distribution of Functional Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Sang,+P">Peijun Sang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Li,+B">Bing Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> We propose a novel method to detect and date structural breaks in the entire distribution of functional data. Theoretical guarantees are developed for our procedure under fewer assumptions than in the existing work. In particular, we establish the asymptotic null distribution of the test statistic, which enables us to test the null hypothesis at a certain significance level. Additionally, the limiting distribution of the estimated structural break date is developed under two situations of the break size: fixed and shrinking towards 0 at a specified rate. We further propose a unified bootstrap procedure to construct a confidence interval for the true structural break date for these two situations. These theoretical results are justified through comprehensive simulation studies in finite samples. We apply the proposed method to two real-world examples: Australian temperature data for detecting structural beaks and Canadian weather data for goodness of fit. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2504.11630" title="Abstract" id="2504.11630"> arXiv:2504.11630 </a> [<a href="/pdf/2504.11630" title="Download PDF" id="pdf-2504.11630" aria-labelledby="pdf-2504.11630">pdf</a>, <a href="https://arxiv.org/html/2504.11630v1" title="View HTML" id="html-2504.11630" aria-labelledby="html-2504.11630" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11630" title="Other formats" id="oth-2504.11630" aria-labelledby="oth-2504.11630">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Statistical Modeling of Combinatorial Response Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Zheng,+Y">Yu Zheng</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ghosh,+M">Malay Ghosh</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Duan,+L">Leo Duan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 48 pages, 9 figures, 2 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> In categorical data analysis, there is rich literature for modeling binary and polychotomous responses. However, existing methods are inadequate for handling combinatorial responses, where each response is an array of integers subject to additional constraints. Such data are increasingly common in modern applications, such as surveys collected under skip logic, event propagation on a network, and observed matching in ecology. Ignoring the combinatorial structure in the response data may lead to biased estimation and prediction. The fundamental challenge for modeling these integer-vector data is the lack of a link function that connects a linear or functional predictor with a probability respecting the combinatorial constraints. In this paper, we propose a novel augmented likelihood, in which a combinatorial response can be viewed as a deterministic transform of a continuous latent variable. We specify the transform as the maximizer of integer linear program, and characterize useful properties such as dual thresholding representation. When taking a Bayesian approach and considering a multivariate normal distribution for the latent variable, our method becomes a direct generalization to the celebrated probit data augmentation, and enjoys straightforward computation via Gibbs sampler. We provide theoretical justification for the proposed method at an interesting intersection between duality and probability distribution and develop useful sufficient conditions that guarantee the applicability of our method. We demonstrate the effectiveness of our method through simulation studies and a real data application on modeling the formation of seasonal matching between waterfowl. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2504.11636" title="Abstract" id="2504.11636"> arXiv:2504.11636 </a> [<a href="/pdf/2504.11636" title="Download PDF" id="pdf-2504.11636" aria-labelledby="pdf-2504.11636">pdf</a>, <a href="https://arxiv.org/html/2504.11636v1" title="View HTML" id="html-2504.11636" aria-labelledby="html-2504.11636" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11636" title="Other formats" id="oth-2504.11636" aria-labelledby="oth-2504.11636">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scalable Efficient Inference in Complex Surveys through Targeted Resampling of Weights </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Das,+S">Snigdha Das</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bandyopadhyay,+D">Dipankar Bandyopadhyay</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pati,+D">Debdeep Pati</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 43 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Survey data often arises from complex sampling designs, such as stratified or multistage sampling, with unequal inclusion probabilities. When sampling is informative, traditional inference methods yield biased estimators and poor coverage. Classical pseudo-likelihood based methods provide accurate asymptotic inference but lack finite-sample uncertainty quantification and the ability to integrate prior information. Existing Bayesian approaches, like the Bayesian pseudo-posterior estimator and weighted Bayesian bootstrap, have limitations; the former struggles with uncertainty quantification, while the latter is computationally intensive and sensitive to bootstrap replicates. To address these challenges, we propose the Survey-adjusted Weighted Likelihood Bootstrap (S-WLB), which resamples weights from a carefully chosen distribution centered around the underlying sampling weights. S-WLB is computationally efficient, theoretically consistent, and delivers finite-sample uncertainty intervals which are proven to be asymptotically valid. We demonstrate its performance through simulations and applications to nationally representative survey datasets like NHANES and NSDUH. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2504.11740" title="Abstract" id="2504.11740"> arXiv:2504.11740 </a> [<a href="/pdf/2504.11740" title="Download PDF" id="pdf-2504.11740" aria-labelledby="pdf-2504.11740">pdf</a>, <a href="https://arxiv.org/html/2504.11740v1" title="View HTML" id="html-2504.11740" aria-labelledby="html-2504.11740" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11740" title="Other formats" id="oth-2504.11740" aria-labelledby="oth-2504.11740">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A cautionary note for plasmode simulation studies in the setting of causal inference </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Shaw,+P+A">Pamela A Shaw</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Gruber,+S">Susan Gruber</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Williamson,+B+D">Brian D. Williamson</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Desai,+R">Rishi Desai</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Shortreed,+S+M">Susan M. Shortreed</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Krakauer,+C">Chloe Krakauer</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Nelson,+J+C">Jennifer C. Nelson</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=van+der+Laan,+M+J">Mark J. van der Laan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 55 pages, 6 tables, 2 figures, 8 supplemental tables, 4 supplemental figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Plasmode simulation has become an important tool for evaluating the operating characteristics of different statistical methods in complex settings, such as pharmacoepidemiological studies of treatment effectiveness using electronic health records (EHR) data. These studies provide insight into how estimator performance is impacted by challenges including rare events, small sample size, etc., that can indicate which among a set of methods performs best in a real-world dataset. Plasmode simulation combines data resampled from a real-world dataset with synthetic data to generate a known truth for an estimand in realistic data. There are different potential plasmode strategies currently in use. We compare two popular plasmode simulation frameworks. We provide numerical evidence and a theoretical result, which shows that one of these frameworks can cause certain estimators to incorrectly appear overly biased with lower than nominal confidence interval coverage. Detailed simulation studies using both synthetic and real-world EHR data demonstrate that these pitfalls remain at large sample sizes and when analyzing data from a randomized controlled trial. We conclude with guidance for the choice of a plasmode simulation approach that maintains good theoretical properties to allow a fair evaluation of statistical methods while also maintaining the desired similarity to real data. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2504.11759" title="Abstract" id="2504.11759"> arXiv:2504.11759 </a> [<a href="/pdf/2504.11759" title="Download PDF" id="pdf-2504.11759" aria-labelledby="pdf-2504.11759">pdf</a>, <a href="https://arxiv.org/html/2504.11759v1" title="View HTML" id="html-2504.11759" aria-labelledby="html-2504.11759" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11759" title="Other formats" id="oth-2504.11759" aria-labelledby="oth-2504.11759">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bringing closure to FDR control: beating the e-Benjamini-Hochberg procedure </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Xu,+Z">Ziyu Xu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ramdas,+A">Aaditya Ramdas</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Statistics Theory (math.ST) </div> <p class='mathjax'> False discovery rate (FDR) has been a key metric for error control in multiple hypothesis testing, and many methods have developed for FDR control across a diverse cross-section of settings and applications. We develop a closure principle for all FDR controlling procedures, i.e., we provide a characterization based on e-values for all admissible FDR controlling procedures. We leverage this idea to formulate the closed eBH procedure, a (usually strict) improvement over the eBH procedure for FDR control when provided with e-values. We demonstrate the practical performance of closed eBH in simulations. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2504.11767" title="Abstract" id="2504.11767"> arXiv:2504.11767 </a> [<a href="/pdf/2504.11767" title="Download PDF" id="pdf-2504.11767" aria-labelledby="pdf-2504.11767">pdf</a>, <a href="https://arxiv.org/html/2504.11767v1" title="View HTML" id="html-2504.11767" aria-labelledby="html-2504.11767" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11767" title="Other formats" id="oth-2504.11767" aria-labelledby="oth-2504.11767">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Post-selection Inference in Regression Models for Group Testing Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Shen,+Q">Qinyan Shen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Gregory,+K">Karl Gregory</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Huang,+X">Xianzheng Huang</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Biometrics. 2024 Jul 1;80(3):ujae101 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> We develop methodology for valid inference after variable selection in logistic regression when the responses are partially observed, that is, when one observes a set of error-prone testing outcomes instead of the true values of the responses. Aiming at selecting important covariates while accounting for missing information in the response data, we apply the expectation-maximization algorithm to compute maximum likelihood estimators subject to LASSO penalization. Subsequent to variable selection, we make inferences on the selected covariate effects by extending post-selection inference methodology based on the polyhedral lemma. Empirical evidence from our extensive simulation study suggests that our post-selection inference results are more reliable than those from naive inference methods that use the same data to perform variable selection and inference without adjusting for variable selection. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2504.11836" title="Abstract" id="2504.11836"> arXiv:2504.11836 </a> [<a href="/pdf/2504.11836" title="Download PDF" id="pdf-2504.11836" aria-labelledby="pdf-2504.11836">pdf</a>, <a href="https://arxiv.org/html/2504.11836v1" title="View HTML" id="html-2504.11836" aria-labelledby="html-2504.11836" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11836" title="Other formats" id="oth-2504.11836" aria-labelledby="oth-2504.11836">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Non-centering for discrete-valued state transition models: an application to ESBL-producing E. coli transmission in Malawi </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Neill,+J">James Neill</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Lester,+R">Rebecca Lester</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bakali,+W">Winnie Bakali</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Roberts,+G">Gareth Roberts</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Feasey,+N">Nicholas Feasey</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Chapman,+L+A+C">Lloyd A. C. Chapman</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Jewell,+C">Chris Jewell</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 8 figures (plus supplementary material with an additional 18 pages, 12 figures) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Infectious disease transmission is often modelled by discrete-valued stochastic state-transition processes. Due to a lack of complete data, Bayesian inference for these models often relies on data-augmentation techniques. These techniques are often inefficient or time consuming to implement. We introduce a novel data-augmentation Markov chain Monte Carlo method for discrete-time individual-based epidemic models, which we call the Rippler algorithm. This method uses the transmission model in the proposal step of the Metropolis-Hastings algorithm, rather than in the accept-reject step. We test the Rippler algorithm on simulated data and apply it to data on extended-spectrum beta-lactamase (ESBL)-producing E. coli collected in Blantyre, Malawi. We compare the Rippler algorithm to two other commonly used Bayesian inference methods for partially observed epidemic data, and find that it has a good balance between mixing speed and computational complexity. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2504.11848" title="Abstract" id="2504.11848"> arXiv:2504.11848 </a> [<a href="/pdf/2504.11848" title="Download PDF" id="pdf-2504.11848" aria-labelledby="pdf-2504.11848">pdf</a>, <a href="https://arxiv.org/html/2504.11848v1" title="View HTML" id="html-2504.11848" aria-labelledby="html-2504.11848" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11848" title="Other formats" id="oth-2504.11848" aria-labelledby="oth-2504.11848">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Proximal Inference on Population Intervention Indirect Effect </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Bai,+Y">Yang Bai</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Cui,+Y">Yifan Cui</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sun,+B">Baoluo Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 60 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Statistics Theory (math.ST); Machine Learning (stat.ML) </div> <p class='mathjax'> The population intervention indirect effect (PIIE) is a novel mediation effect representing the indirect component of the population intervention effect. Unlike traditional mediation measures, such as the natural indirect effect, the PIIE holds particular relevance in observational studies involving unethical exposures, when hypothetical interventions that impose harmful exposures are inappropriate. Although prior research has identified PIIE under unmeasured confounders between exposure and outcome, it has not fully addressed the confounding that affects the mediator. This study extends the PIIE identification to settings where unmeasured confounders influence exposure-outcome, exposure-mediator, and mediator-outcome relationships. Specifically, we leverage observed covariates as proxy variables for unmeasured confounders, constructing three proximal identification frameworks. Additionally, we characterize the semiparametric efficiency bound and develop multiply robust and locally efficient estimators. To handle high-dimensional nuisance parameters, we propose a debiased machine learning approach that achieves $\sqrt{n}$-consistency and asymptotic normality to estimate the true PIIE values, even when the machine learning estimators for the nuisance functions do not converge at $\sqrt{n}$-rate. In simulations, our estimators demonstrate higher confidence interval coverage rates than conventional methods across various model misspecifications. In a real data application, our approaches reveal an indirect effect of alcohol consumption on depression risk mediated by depersonalization symptoms. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2504.11906" title="Abstract" id="2504.11906"> arXiv:2504.11906 </a> [<a href="/pdf/2504.11906" title="Download PDF" id="pdf-2504.11906" aria-labelledby="pdf-2504.11906">pdf</a>, <a href="https://arxiv.org/html/2504.11906v1" title="View HTML" id="html-2504.11906" aria-labelledby="html-2504.11906" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11906" title="Other formats" id="oth-2504.11906" aria-labelledby="oth-2504.11906">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Testing of tempered fractional Brownian motions </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Macioszek,+K">Katarzyna Macioszek</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sabzikar,+F">Farzad Sabzikar</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Burnecki,+K">Krzysztof Burnecki</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Data Analysis, Statistics and Probability (physics.data-an) </div> <p class='mathjax'> We propose here a testing methodology based on the autocovariance, detrended moving average, and time-averaged mean-squared displacement statistics for tempered fractional Brownian motions (TFBMs) which are related to the notions of semi-long range dependence and transient anomalous diffusion. In this framework, we consider three types of TFBMs: two with a tempering factor incorporated into their moving-average representation, and one with a tempering parameter added to the autocorrelation formula. We illustrate their dynamics with the use of quantile lines. Using the proposed methodology, we provide a comprehensive power analysis of the tests. It appears that the tests allow distinguishing between the tempered processes with different Hurst parameters. </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2504.12085" title="Abstract" id="2504.12085"> arXiv:2504.12085 </a> [<a href="/pdf/2504.12085" title="Download PDF" id="pdf-2504.12085" aria-labelledby="pdf-2504.12085">pdf</a>, <a href="https://arxiv.org/html/2504.12085v1" title="View HTML" id="html-2504.12085" aria-labelledby="html-2504.12085" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.12085" title="Other formats" id="oth-2504.12085" aria-labelledby="oth-2504.12085">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Semiparametric Causal Discovery and Inference with Invalid Instruments </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Zou,+J">Jing Zou</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Li,+W">Wei Li</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Lin,+W">Wei Lin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Learning causal relationships among a set of variables, as encoded by a directed acyclic graph, from observational data is complicated by the presence of unobserved confounders. Instrumental variables (IVs) are a popular remedy for this issue, but most existing methods either assume the validity of all IVs or postulate a specific form of relationship, such as a linear model, between the primary variables and the IVs. To overcome these limitations, we introduce a partially linear structural equation model for causal discovery and inference that accommodates potentially invalid IVs and allows for general dependence of the primary variables on the IVs. We establish identification under this semiparametric model by constructing surrogate valid IVs, and develop a finite-sample procedure for estimating the causal structures and effects. Theoretically, we show that our procedure consistently learns the causal structures, yields asymptotically normal estimates, and effectively controls the false discovery rate in edge recovery. Simulation studies demonstrate the superiority of our method over existing competitors, and an application to inferring gene regulatory networks in Alzheimer's disease illustrates its usefulness. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2504.12214" title="Abstract" id="2504.12214"> arXiv:2504.12214 </a> [<a href="/pdf/2504.12214" title="Download PDF" id="pdf-2504.12214" aria-labelledby="pdf-2504.12214">pdf</a>, <a href="https://arxiv.org/html/2504.12214v1" title="View HTML" id="html-2504.12214" aria-labelledby="html-2504.12214" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.12214" title="Other formats" id="oth-2504.12214" aria-labelledby="oth-2504.12214">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bayesian random-effects meta-analysis of aggregate data on clinical events </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=R%C3%B6ver,+C">Christian R枚ver</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Wu,+Q">Qiong Wu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Loos,+A">Anja Loos</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Friede,+T">Tim Friede</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> To appreciate intervention effects on rare events, meta-analysis techniques are commonly applied in order to assess the accumulated evidence. When it comes to adverse effects in clinical trials, these are often most adequately handled using survival methods. A common-effect model that is able to process data in commonly quoted formats in terms of hazard ratios has been proposed for this purpose by Holzhauer (Stat. Med. 2017; 36(5):723-737). In order to accommodate potential heterogeneity between studies, we have extended the model by Holzhauer to a random-effects approach. The Bayesian model is described in detail, and applications to realistic data sets are discussed along with sensitivity analyses and Monte Carlo simulations to support the conclusions. </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2504.12287" title="Abstract" id="2504.12287"> arXiv:2504.12287 </a> [<a href="/pdf/2504.12287" title="Download PDF" id="pdf-2504.12287" aria-labelledby="pdf-2504.12287">pdf</a>, <a href="/format/2504.12287" title="Other formats" id="oth-2504.12287" aria-labelledby="oth-2504.12287">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Trend Filtered Mixture of Experts for Automated Gating of High-Frequency Flow Cytometry Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Hyun,+S">Sangwon Hyun</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Coleman,+T">Tim Coleman</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ribalet,+F">Francois Ribalet</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bien,+J">Jacob Bien</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 page (including supplement), 9 figures (including supplement) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Applications (stat.AP); Machine Learning (stat.ML) </div> <p class='mathjax'> Ocean microbes are critical to both ocean ecosystems and the global climate. Flow cytometry, which measures cell optical properties in fluid samples, is routinely used in oceanographic research. Despite decades of accumulated data, identifying key microbial populations (a process known as ``gating'') remains a significant analytical challenge. To address this, we focus on gating multidimensional, high-frequency flow cytometry data collected {\it continuously} on board oceanographic research vessels, capturing time- and space-wise variations in the dynamic ocean. Our paper proposes a novel mixture-of-experts model in which both the gating function and the experts are given by trend filtering. The model leverages two key assumptions: (1) Each snapshot of flow cytometry data is a mixture of multivariate Gaussians and (2) the parameters of these Gaussians vary smoothly over time. Our method uses regularization and a constraint to ensure smoothness and that cluster means match biologically distinct microbe types. We demonstrate, using flow cytometry data from the North Pacific Ocean, that our proposed model accurately matches human-annotated gating and corrects significant errors. </p> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2504.12288" title="Abstract" id="2504.12288"> arXiv:2504.12288 </a> [<a href="/pdf/2504.12288" title="Download PDF" id="pdf-2504.12288" aria-labelledby="pdf-2504.12288">pdf</a>, <a href="https://arxiv.org/html/2504.12288v1" title="View HTML" id="html-2504.12288" aria-labelledby="html-2504.12288" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.12288" title="Other formats" id="oth-2504.12288" aria-labelledby="oth-2504.12288">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The underlap coefficient as a measure of a biomarker's discriminatory ability </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Zhang,+Z">Zhaoxi Zhang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Inacio,+V">Vanda Inacio</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=de+Carvalho,+M">Miguel de Carvalho</a> (for the Alzheimer's Disease Neuroimaging Initiative)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> The first step in evaluating a potential diagnostic biomarker is to examine the variation in its values across different disease groups. In a three-class disease setting, the volume under the receiver operating characteristic surface and the three-class Youden index are commonly used summary measures of a biomarker's discriminatory ability. However, these measures rely on a stochastic ordering assumption for the distributions of biomarker outcomes across the three groups. This assumption can be restrictive, particularly when covariates are involved, and its violation may lead to incorrect conclusions about a biomarker's ability to distinguish between the three disease classes. Even when a stochastic ordering exists, the order may vary across different biomarkers in discovery studies involving dozens or even thousands of candidate biomarkers, complicating automated ranking. To address these challenges and complement existing measures, we propose the underlap coefficient, a novel summary index of a biomarker's ability to distinguish between three (or more) disease groups, and study its properties. Additionally, we introduce Bayesian nonparametric estimators for both the unconditional underlap coefficient and its covariate-specific counterpart. These estimators are broadly applicable to a wide range of biomarkers and populations. A simulation study reveals a good performance of the proposed estimators across a range of conceivable scenarios. We illustrate the proposed approach through an application to an Alzheimer's disease (AD) dataset aimed to assess how four potential AD biomarkers distinguish between individuals with normal cognition, mild impairment, and dementia, and how and if age and gender impact this discriminatory ability. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 2 of 2 entries)</h3> <dt> <a name='item16'>[16]</a> <a href ="/abs/2503.08746" title="Abstract" id="2503.08746"> arXiv:2503.08746 </a> (cross-list from q-bio.QM) [<a href="/pdf/2503.08746" title="Download PDF" id="pdf-2503.08746" aria-labelledby="pdf-2503.08746">pdf</a>, <a href="/format/2503.08746" title="Other formats" id="oth-2503.08746" aria-labelledby="oth-2503.08746">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> In silico clinical trials in drug development: a systematic review </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Chen,+B">Bohua Chen</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Schneider,+L+C">Lucia Chantal Schneider</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=R%C3%B6ver,+C">Christian R枚ver</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Comets,+E">Emmanuelle Comets</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Elze,+M+C">Markus Christian Elze</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Hooker,+A">Andrew Hooker</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=IntHout,+J">Joanna IntHout</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Jannot,+A">Anne-Sophie Jannot</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Julkowska,+D">Daria Julkowska</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Mimouni,+Y">Yanis Mimouni</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Savelieva,+M">Marina Savelieva</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Stallard,+N">Nigel Stallard</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ursino,+M">Moreno Ursino</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Vandemeulebroecke,+M">Marc Vandemeulebroecke</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Weber,+S">Sebastian Weber</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Posch,+M">Martin Posch</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zohar,+S">Sarah Zohar</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Friede,+T">Tim Friede</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 30 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantitative Methods (q-bio.QM)</span>; Methodology (stat.ME) </div> <p class='mathjax'> In the context of clinical research, computational models have received increasing attention over the past decades. In this systematic review, we aimed to provide an overview of the role of so-called in silico clinical trials (ISCTs) in medical applications. Exemplary for the broad field of clinical medicine, we focused on in silico (IS) methods applied in drug development, sometimes also referred to as model informed drug development (MIDD). We searched PubMed and <a href="http://ClinicalTrials.gov" rel="external noopener nofollow" class="link-external link-http">this http URL</a> for published articles and registered clinical trials related to ISCTs. We identified 202 articles and 48 trials, and of these, 76 articles and 19 trials were directly linked to drug development. We extracted information from all 202 articles and 48 clinical trials and conducted a more detailed review of the methods used in the 76 articles that are connected to drug development. Regarding application, most articles and trials focused on cancer and imaging related research while rare and pediatric diseases were only addressed in 18 and 4 studies, respectively. While some models were informed combining mechanistic knowledge with clinical or preclinical (in-vivo or in-vitro) data, the majority of models were fully data-driven, illustrating that clinical data is a crucial part in the process of generating synthetic data in ISCTs. Regarding reproducibility, a more detailed analysis revealed that only 24% (18 out of 76) of the articles provided an open-source implementation of the applied models, and in only 20% of the articles the generated synthetic data were publicly available. Despite the widely raised interest, we also found that it is still uncommon for ISCTs to be part of a registered clinical trial and their application is restricted to specific diseases leaving potential benefits of ISCTs not fully exploited. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2504.11609" title="Abstract" id="2504.11609"> arXiv:2504.11609 </a> (cross-list from stat.ML) [<a href="/pdf/2504.11609" title="Download PDF" id="pdf-2504.11609" aria-labelledby="pdf-2504.11609">pdf</a>, <a href="https://arxiv.org/html/2504.11609v1" title="View HTML" id="html-2504.11609" aria-labelledby="html-2504.11609" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.11609" title="Other formats" id="oth-2504.11609" aria-labelledby="oth-2504.11609">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Interpretable Deep Generative Models via Causal Representation Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Moran,+G+E">Gemma E. Moran</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Aragam,+B">Bryon Aragam</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Methodology (stat.ME) </div> <p class='mathjax'> Recent developments in generative artificial intelligence (AI) rely on machine learning techniques such as deep learning and generative modeling to achieve state-of-the-art performance across wide-ranging domains. These methods' surprising performance is due in part to their ability to learn implicit "representations'' of complex, multi-modal data. Unfortunately, deep neural networks are notoriously black boxes that obscure these representations, making them difficult to interpret or analyze. To resolve these difficulties, one approach is to build new interpretable neural network models from the ground up. This is the goal of the emerging field of causal representation learning (CRL) that uses causality as a vector for building flexible, interpretable, and transferable generative AI. CRL can be seen as a culmination of three intrinsically statistical problems: (i) latent variable models such as factor analysis; (ii) causal graphical models with latent variables; and (iii) nonparametric statistics and deep learning. This paper reviews recent progress in CRL from a statistical perspective, focusing on connections to classical models and statistical and causal identifiablity results. This review also highlights key application areas, implementation strategies, and open statistical questions in CRL. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 14 of 14 entries)</h3> <dt> <a name='item18'>[18]</a> <a href ="/abs/2311.09446" title="Abstract" id="2311.09446"> arXiv:2311.09446 </a> (replaced) [<a href="/pdf/2311.09446" title="Download PDF" id="pdf-2311.09446" aria-labelledby="pdf-2311.09446">pdf</a>, <a href="https://arxiv.org/html/2311.09446v3" title="View HTML" id="html-2311.09446" aria-labelledby="html-2311.09446" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.09446" title="Other formats" id="oth-2311.09446" aria-labelledby="oth-2311.09446">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scalable simulation-based inference for implicitly defined models using a metamodel for Monte Carlo log-likelihood estimator </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Park,+J">Joonha Park</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Statistics Theory (math.ST) </div> <p class='mathjax'> Models implicitly defined through a random simulator of a process have become widely used in scientific and industrial applications in recent years. However, simulation-based inference methods for such implicit models, like approximate Bayesian computation (ABC), often scale poorly as data size increases. We develop a scalable inference method for implicitly defined models using a metamodel for the Monte Carlo log-likelihood estimator derived from simulations. This metamodel characterizes both statistical and simulation-based randomness in the distribution of the log-likelihood estimator across different parameter values. Our metamodel-based method quantifies uncertainty in parameter estimation in a principled manner, leveraging the local asymptotic normality of the mean function of the log-likelihood estimator. We apply this method to construct accurate confidence intervals for parameters of partially observed Markov process models where the Monte Carlo log-likelihood estimator is obtained using the bootstrap particle filter. We numerically demonstrate that our method enables accurate and highly scalable parameter inference across several examples, including a mechanistic compartment model for infectious diseases. </p> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2311.13017" title="Abstract" id="2311.13017"> arXiv:2311.13017 </a> (replaced) [<a href="/pdf/2311.13017" title="Download PDF" id="pdf-2311.13017" aria-labelledby="pdf-2311.13017">pdf</a>, <a href="https://arxiv.org/html/2311.13017v5" title="View HTML" id="html-2311.13017" aria-labelledby="html-2311.13017" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.13017" title="Other formats" id="oth-2311.13017" aria-labelledby="oth-2311.13017">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> W-Kernel and Its Principal Space for Frequentist Evaluation of Bayesian Estimators </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Iba,+Y">Yukito Iba</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The introductory sections have been revised to clarify the relationship with previous work. The discussion of Bayesian-frequentist duality and the Z matrix has also been revised. The analysis of numerical experiments is substantially extended. The title has been updated </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Statistical Mechanics (cond-mat.stat-mech); Machine Learning (stat.ML) </div> <p class='mathjax'> Evaluating the variability of posterior estimates is a key aspect of Bayesian model assessment. In this study, we focus on the posterior covariance matrix W, defined using the log-likelihood of each observation. Previous studies have examined the role of the principal space of W in Bayesian sensitivity analysis, notably MacEachern and Peruggia (2002) and Thomas et al. (2018). In this work, we show that the principal space of W is also relevant for frequentist evaluation, using the recently proposed Bayesian infinitesimal jackknife (IJ) approximation Giordano and Broderick (2023) as a key tool. We next consider the relationship between the matrix W and the Fisher kernel. We show that the Fisher kernel can be regarded as an approximation to W; the matrix W, in itself, can be interpreted as a reproducing kernel, which we refer to as the W-kernel. Based on this connection, we examine the dual relationship between the W-kernel formulation in the data space and the classical asymptotic formulation in the parameter space. These ideas suggest a form of Bayesian-frequentist duality that emerges through the dual structure of kernel PCA, where posterior and frequentist covariances serve as inner products in their respective spaces. As an application, we consider an approximate bootstrap of posterior means based on posterior samples generated by MCMC. We show that the projection onto the principal space of W facilitates frequentist evaluation, particularly of the higher-order term in this procedure. In one of the appendices, we introduce incomplete Cholesky decomposition as an efficient method for computing the principal space of W and discuss the related concept of representative subsets of the observations. </p> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2401.14359" title="Abstract" id="2401.14359"> arXiv:2401.14359 </a> (replaced) [<a href="/pdf/2401.14359" title="Download PDF" id="pdf-2401.14359" aria-labelledby="pdf-2401.14359">pdf</a>, <a href="https://arxiv.org/html/2401.14359v4" title="View HTML" id="html-2401.14359" aria-labelledby="html-2401.14359" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2401.14359" title="Other formats" id="oth-2401.14359" aria-labelledby="oth-2401.14359">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Stability Framework for Parameter Selection in the Minimum Covariance Determinant Problem </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Heng,+Q">Qiang Heng</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Shen,+H">Hui Shen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Lange,+K">Kenneth Lange</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Computation (stat.CO) </div> <p class='mathjax'> The Minimum Covariance Determinant (MCD) method is a widely adopted tool for robust estimation and outlier detection. In this paper, we introduce MCD model selection based on the notion of stability. Our best subset method leverages prior best practices such as statistical depths for initialization and concentration steps for subset refinement. Our contribution lies in constructing a bootstrap procedure to estimate the instability of the best subset algorithm. The instability path offers insights into a dataset's inlier/outlier structure and facilitates suitable choice of the subset size. We rigorously benchmark the proposed framework against existing MCD variants and illustrate its practical utility on several real-world datasets. </p> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2401.17452" title="Abstract" id="2401.17452"> arXiv:2401.17452 </a> (replaced) [<a href="/pdf/2401.17452" title="Download PDF" id="pdf-2401.17452" aria-labelledby="pdf-2401.17452">pdf</a>, <a href="https://arxiv.org/html/2401.17452v4" title="View HTML" id="html-2401.17452" aria-labelledby="html-2401.17452" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2401.17452" title="Other formats" id="oth-2401.17452" aria-labelledby="oth-2401.17452">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Group-Weighted Conformal Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Bhattacharyya,+A">Aabesh Bhattacharyya</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Barber,+R+F">Rina Foygel Barber</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Conformal prediction (CP) is a method for constructing a prediction interval around the output of a fitted model, whose validity does not rely on the model being correct--the CP interval offers a coverage guarantee that is distribution-free, but relies on the training data being drawn from the same distribution as the test data. A recent variant, weighted conformal prediction (WCP), reweights the method to allow for covariate shift between the training and test distributions. However, WCP requires knowledge of the nature of the covariate shift-specifically,the likelihood ratio between the test and training covariate distributions. In practice, since this likelihood ratio is estimated rather than known exactly, the coverage guarantee may degrade due to the estimation error. In this paper, we consider a special scenario where observations belong to a finite number of groups, and these groups determine the covariate shift between the training and test distributions-for instance, this may arise if the training set is collected via stratified sampling. Our results demonstrate that in this special case, the predictive coverage guarantees of WCP can be drastically improved beyond the bounds given by existing estimation error bounds. </p> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2404.07923" title="Abstract" id="2404.07923"> arXiv:2404.07923 </a> (replaced) [<a href="/pdf/2404.07923" title="Download PDF" id="pdf-2404.07923" aria-labelledby="pdf-2404.07923">pdf</a>, <a href="https://arxiv.org/html/2404.07923v3" title="View HTML" id="html-2404.07923" aria-labelledby="html-2404.07923" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.07923" title="Other formats" id="oth-2404.07923" aria-labelledby="oth-2404.07923">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Bayesian Estimator of Sample Size </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Bi,+D">Dehua Bi</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ji,+Y">Yuan Ji</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> We consider a Bayesian estimator of sample size (BESS) and an application to oncology dose optimization clinical trials. BESS is built upon three pillars, Sample size, Evidence from observed data, and Confidence in posterior inference. It uses a simple logic of "given the evidence from data, a specific sample size can achieve a degree of confidence in the posterior inference." The key distinction between BESS and standard sample size estimation (SSE) is that SSE, typically based on Frequentist inference, specifies the true parameters values in its calculation while BESS assumes possible outcome from the observed data. As a result, the calibration of the sample size is not based on type I or type II error rates, but on posterior probabilities. We demonstrate that BESS leads to a more interpretable statement for investigators, and can easily accommodates prior information as well as sample size re-estimation. We explore its performance in comparison to the standard SSE and demonstrate its usage through a case study of oncology optimization trial. BESS can be applied to general hypothesis tests. An R tool is available at <a href="https://ccte.uchicago.edu/BESS" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2405.08759" title="Abstract" id="2405.08759"> arXiv:2405.08759 </a> (replaced) [<a href="/pdf/2405.08759" title="Download PDF" id="pdf-2405.08759" aria-labelledby="pdf-2405.08759">pdf</a>, <a href="https://arxiv.org/html/2405.08759v2" title="View HTML" id="html-2405.08759" aria-labelledby="html-2405.08759" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.08759" title="Other formats" id="oth-2405.08759" aria-labelledby="oth-2405.08759">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Optimal Sequential Procedure for Early Detection of Multiple Side Effects </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Wang,+J">Jiayue Wang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Boukai,+B">Ben Boukai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> A total of 30 pages with 6 Tables and 8 Figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Applications (stat.AP) </div> <p class='mathjax'> In this paper, we propose an optimal sequential procedure for the early detection of potential side effects resulting from the administration of some treatment (e.g. a vaccine, say). The results presented here extend previous results obtained in Wang and Boukai (2024) who study the single side effect case to the case of two (or more) side effects. While the sequential procedure we employ, simultaneously monitors several of the treatment's side effects, the $(\alpha, \beta)$-optimal test we propose does not require any information about the inter-correlation between these potential side effects. However, in all of the subsequent analyses, including the derivations of the exact expressions of the Average Sample Number (ASN), the Power function, and the properties of the post-test (or post-detection) estimators, we accounted specifically, for the correlation between the potential side effects. In the real-life application (such as post-marketing surveillance), the number of available observations is large enough to justify asymptotic analyses of the sequential procedure (testing and post-detection estimation) properties. Accordingly, we also derive the consistency and asymptotic normality of our post-test estimators; results which enable us to also provide (asymptotic, post-detection) confidence intervals for the probabilities of various side-effects. Moreover, to compare two specific side effects, their relative risk plays an important role. We derive the distribution of the estimated relative risk in the asymptotic framework to provide appropriate inference. To illustrate the theoretical results presented, we provide two detailed examples based on the data of side effects on COVID-19 vaccine collected in Nigeria (see Nigeria (see Ilori et al. (2022)). </p> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2410.05858" title="Abstract" id="2410.05858"> arXiv:2410.05858 </a> (replaced) [<a href="/pdf/2410.05858" title="Download PDF" id="pdf-2410.05858" aria-labelledby="pdf-2410.05858">pdf</a>, <a href="https://arxiv.org/html/2410.05858v2" title="View HTML" id="html-2410.05858" aria-labelledby="html-2410.05858" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.05858" title="Other formats" id="oth-2410.05858" aria-labelledby="oth-2410.05858">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Detecting dependence structure: visualization and inference </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=%C4%86miel,+B">Bogdan 膯miel</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ledwina,+T">Teresa Ledwina</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Identifying dependency between two random variables is a fundamental problem. The clear interpretability and ability of a procedure to provide information on the form of possible dependence is particularly important when exploring dependencies. In this paper, we introduce a novel method that employs a new estimator of the quantile dependence function and pertinent local acceptance regions. This leads to an insightful visualisation and a rigorous evaluation of the underlying dependence structure. We also propose a test of independence of two random variables, pertinent to this new estimator. Our procedures are based on ranks, and we derive a finite-sample theory that guarantees the inferential validity of our solutions at any given sample size. The procedures are simple to implement and computationally efficient. The large sample consistency of the proposed test is also proved. We show that, in terms of power, the new test is one of the best statistics for independence testing when considering a wide range of alternative models. Finally, we demonstrate the use of our approach to visualise dependence structure and to detect local departures from independence through analysing some real-world datasets. </p> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2501.18134" title="Abstract" id="2501.18134"> arXiv:2501.18134 </a> (replaced) [<a href="/pdf/2501.18134" title="Download PDF" id="pdf-2501.18134" aria-labelledby="pdf-2501.18134">pdf</a>, <a href="/format/2501.18134" title="Other formats" id="oth-2501.18134" aria-labelledby="oth-2501.18134">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Nonlocal prior mixture-based Bayesian wavelet regression </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Sanyal,+N">Nilotpal Sanyal</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25 pages, 6 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Applications (stat.AP) </div> <p class='mathjax'> We propose a novel Bayesian wavelet regression approach using a three-component spike-and-slab prior for wavelet coefficients, combining a point mass at zero, a moment (MOM) prior, and an inverse moment (IMOM) prior. This flexible prior supports small and large coefficients differently, offering advantages for highly dispersed data where wavelet coefficients span multiple scales. The IMOM prior's heavy tails capture large coefficients, while the MOM prior is better suited for smaller non-zero coefficients. Further, our method introduces innovative hyperparameter specifications for mixture probabilities and scale parameters, including generalized logit, hyperbolic secant, and generalized normal decay for probabilities, and double exponential decay for scaling. Hyperparameters are estimated via an empirical Bayes approach, enabling posterior inference tailored to the data. Extensive simulations demonstrate significant performance gains over two-component wavelet methods. Applications to electroencephalography and noisy audio data illustrate the method's utility in capturing complex signal characteristics. We implement our method in an R package NLPwavelet. </p> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2503.22333" title="Abstract" id="2503.22333"> arXiv:2503.22333 </a> (replaced) [<a href="/pdf/2503.22333" title="Download PDF" id="pdf-2503.22333" aria-labelledby="pdf-2503.22333">pdf</a>, <a href="https://arxiv.org/html/2503.22333v2" title="View HTML" id="html-2503.22333" aria-labelledby="html-2503.22333" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.22333" title="Other formats" id="oth-2503.22333" aria-labelledby="oth-2503.22333">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On Bessel's Correction -- Unbiased Sample Variance, the "Bariance," and a Novel Runtime-Optimized Unbiased Sample Variance Estimator </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Reichel,+F">Felix Reichel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 6 figures, 6 tables, 5 references, 1 appendix </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> Bessel's correction adjusts the denominator in the sample variance formula from n to n - 1 to produce an unbiased estimator for the population variance. This paper includes rigorous derivations, geometric interpretations, and visualizations. It then introduces the concept of 'bariance', an alternative pairwise distances intuition of sample dispersion without an arithmetic mean. Finally, we address practical concerns raised in Rosenthal's article advocating the use of n-based estimates from a more holistic MSE-based viewpoint for pedagogical reasons and in certain practical contexts. Finally, the empirical part using simulation reveals that the run-time of estimating population variance can be significantly shortened when using an algebraically optimized bariance approach using scalar sums to estimate an unbiased variance. </p> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2504.03480" title="Abstract" id="2504.03480"> arXiv:2504.03480 </a> (replaced) [<a href="/pdf/2504.03480" title="Download PDF" id="pdf-2504.03480" aria-labelledby="pdf-2504.03480">pdf</a>, <a href="https://arxiv.org/html/2504.03480v2" title="View HTML" id="html-2504.03480" aria-labelledby="html-2504.03480" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.03480" title="Other formats" id="oth-2504.03480" aria-labelledby="oth-2504.03480">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multivariate Causal Effects: a Bayesian Causal Regression Factor Model </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Zorzetto,+D">Dafne Zorzetto</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Landy,+J">Jenna Landy</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zigler,+C">Corwin Zigler</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Parmigiani,+G">Giovanni Parmigiani</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=De+Vito,+R">Roberta De Vito</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span> </div> <p class='mathjax'> The impact of wildfire smoke on air quality is a growing concern, contributing to air pollution through a complex mixture of chemical species with important implications for public health. While previous studies have primarily focused on its association with total particulate matter (PM2.5), the causal relationship between wildfire smoke and the chemical composition of PM2.5 remains largely unexplored. Exposure to these chemical mixtures plays a critical role in shaping public health, yet capturing their relationships requires advanced statistical methods capable of modeling the complex dependencies among chemical species. To fill this gap, we propose a Bayesian causal regression factor model that estimates the multivariate causal effects of wildfire smoke on the concentration of 27 chemical species in PM2.5 across the United States. Our approach introduces two key innovations: (i) a causal inference framework for multivariate potential outcomes, and (ii) a novel Bayesian factor model that employs a probit stick-breaking process as prior for treatment-specific factor scores. By focusing on factor scores, our method addresses the missing data challenge common in causal inference and enables a flexible, data-driven characterization of the latent factor structure, which is crucial to capture the complex correlation among multivariate outcomes. Through Monte Carlo simulations, we show the model's accuracy in estimating the causal effects in multivariate outcomes and characterizing the treatment-specific latent structure. Finally, we apply our method to US air quality data, estimating the causal effect of wildfire smoke on 27 chemical species in PM2.5, providing a deeper understanding of their interdependencies. </p> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2412.00753" title="Abstract" id="2412.00753"> arXiv:2412.00753 </a> (replaced) [<a href="/pdf/2412.00753" title="Download PDF" id="pdf-2412.00753" aria-labelledby="pdf-2412.00753">pdf</a>, <a href="/format/2412.00753" title="Other formats" id="oth-2412.00753" aria-labelledby="oth-2412.00753">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The ecological forecast limit revisited: Potential, actual and relative system predictability </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Wesselkamp,+M">Marieke Wesselkamp</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Albrecht,+J">Jakob Albrecht</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pinnington,+E">Ewan Pinnington</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Castillo,+W+J">William J. Castillo</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pappenberger,+F">Florian Pappenberger</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Dormann,+C+F">Carsten F. Dormann</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Applications (stat.AP)</span>; Data Analysis, Statistics and Probability (physics.data-an); Populations and Evolution (q-bio.PE); Methodology (stat.ME) </div> <p class='mathjax'> Ecological forecasts are model-based statements about currently unknown ecosystem states in time or space. For a model forecast to be useful to inform decision makers, model validation and verification determine adequateness. The measure of forecast goodness that can be translated into a limit up to which a forecast is acceptable is known as the 'forecast limit'. While verification in weather forecasting follows strict criteria with established metrics and forecast limits, assessments of ecological forecasting models still remain experiment-specific, and forecast limits are rarely reported. As such, users of ecological forecasts remain uninformed of how far into the future statements can be trusted. In this work, we synthesise existing approaches to define empirical forecast limits in a unified framework for assessing ecological predictability and offer recipes for their computation. We distinguish the model's potential and absolute forecast limit, and show how a benchmark model can help determine its relative forecast limit. The approaches are demonstrated with three case studies from population, ecosystem, and Earth system research. We found that forecast limits can be computed with three requirements: A verification reference, a scoring function, and a predictive error tolerance. Within our framework, forecast limits are defined for practically any ecological forecast and support research on ecological predictability analysis. </p> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2501.10675" title="Abstract" id="2501.10675"> arXiv:2501.10675 </a> (replaced) [<a href="/pdf/2501.10675" title="Download PDF" id="pdf-2501.10675" aria-labelledby="pdf-2501.10675">pdf</a>, <a href="/format/2501.10675" title="Other formats" id="oth-2501.10675" aria-labelledby="oth-2501.10675">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Recovering Unobserved Network Links from Aggregated Relational Data: Discussions on Bayesian Latent Surface Modeling and Penalized Regression </div> <div class='list-authors'><a href="https://arxiv.org/search/econ?searchtype=author&query=Tseng,+Y">Yen-hsuan Tseng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This version was an early preprint of a manuscript currently under peer review and will be revised substantially. It has been withdrawn to avoid confusion during the review process </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Econometrics (econ.EM)</span>; Applications (stat.AP); Methodology (stat.ME) </div> <p class='mathjax'> Accurate network data are essential in fields such as economics, sociology, and computer science. Aggregated Relational Data (ARD) provides a way to capture network structures using partial data. This article compares two main frameworks for recovering network links from ARD: Bayesian Latent Surface Modeling (BLSM) and Frequentist Penalized Regression (FPR). Using simulation studies and real-world applications, we evaluate their theoretical properties, computational efficiency, and practical utility in domains like financial risk assessment and epidemiology. Key findings emphasize the importance of trait design, privacy considerations, and hybrid modeling approaches to improve scalability and robustness. </p> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2502.10545" title="Abstract" id="2502.10545"> arXiv:2502.10545 </a> (replaced) [<a href="/pdf/2502.10545" title="Download PDF" id="pdf-2502.10545" aria-labelledby="pdf-2502.10545">pdf</a>, <a href="https://arxiv.org/html/2502.10545v2" title="View HTML" id="html-2502.10545" aria-labelledby="html-2502.10545" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.10545" title="Other formats" id="oth-2502.10545" aria-labelledby="oth-2502.10545">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> E-TRIALS: Empowering Data-Driven Decisions to Enhance Computer-Based Learning Platforms </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Siedahmed,+A">Abubakir Siedahmed</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pei,+Y">Yanping Pei</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sales,+A+C">Adam C Sales</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Heffernan,+N+T">Neil T Heffernan</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Gagnon-Bartsch,+J">Johann Gagnon-Bartsch</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhang,+D">Di Zhang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Schuetze,+B+A">Brendan A. Schuetze</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zengilowski,+A">Allison Zengilowski</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Applications (stat.AP)</span>; Methodology (stat.ME) </div> <p class='mathjax'> Computer-based learning platforms (CBLPs) have become a common medium in schools, transforming how students learn and interact with educational content. However, researchers still lack adequate tools to address the diverse set of challenges that students face in these environments. In this paper, we introduce \textbf{Ed-Tech Research Infrastructure to Advance Learning Sciences (E-TRIALS)}, a free tool developed by ASSISTments to help researchers conduct randomized controlled trials in the realm of learning sciences. We describe its features, the types of experiments it supports, and how it can address critical research questions. We showcase E-TRIALS' capabilities through two real-world interventions. Finally, we evaluate the efficacy of interventions using three average treatment effect (ATE) estimators. Student's t-test, regression, and Leave-One-Out Potential outcomes (LOOP). The results demonstrate that the unbiased LOOP estimator can achieve greater precision by adjusting for baseline covariates compared to the Student's t test. Our work demonstrates the potential of E-TRIALS to advance research and contribute to the development of more effective, inclusive, and adaptive CBLP. The code used for this work is available at <a href="https://osf.io/xp6ch/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2504.07722" title="Abstract" id="2504.07722"> arXiv:2504.07722 </a> (replaced) [<a href="/pdf/2504.07722" title="Download PDF" id="pdf-2504.07722" aria-labelledby="pdf-2504.07722">pdf</a>, <a href="https://arxiv.org/html/2504.07722v2" title="View HTML" id="html-2504.07722" aria-labelledby="html-2504.07722" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.07722" title="Other formats" id="oth-2504.07722" aria-labelledby="oth-2504.07722">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Relaxing the Markov Requirements on Reinforcement Learning Under Weak Partial Ignorability </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bleile,+M">MaryLena Bleile</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Methodology (stat.ME) </div> <p class='mathjax'> Incomplete data, confounding effects, and violations of the Markov property are interrelated problems which are ubiquitous in Reinforcement Learning applications. We introduce the concept of ``partial ignorabilty" and leverage it to establish a novel convergence theorem for adaptive Reinforcement Learning. This theoretical result relaxes the Markov assumption on the stochastic process underlying conventional $Q$-learning, deploying a generalized form of the Robbins-Monro stochastic approximation theorem to establish optimality. This result has clear downstream implications for most active subfields of Reinforcement Learning, with clear paths for extension to the field of Causal Inference. </p> </div> </dd> </dl> <div class='paging'>Total of 31 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/stat.ME/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>