CINXE.COM

<!DOCTYPE html> <html lang="en"> <head> <title>Machine Learning </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/stat.ML/recent">stat.ML</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Machine Learning</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item5">Cross-lists</a></li> <li><a href="#item12">Replacements</a></li> </ul> <p>See <a id="recent-stat.ML" aria-labelledby="recent-stat.ML" href="/list/stat.ML/recent">recent</a> articles</p> <h3>Showing new listings for Friday, 21 March 2025</h3> <div class='paging'>Total of 30 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/stat.ML/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 4 of 4 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.15582" title="Abstract" id="2503.15582"> arXiv:2503.15582 </a> [<a href="/pdf/2503.15582" title="Download PDF" id="pdf-2503.15582" aria-labelledby="pdf-2503.15582">pdf</a>, <a href="https://arxiv.org/html/2503.15582v1" title="View HTML" id="html-2503.15582" aria-labelledby="html-2503.15582" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15582" title="Other formats" id="oth-2503.15582" aria-labelledby="oth-2503.15582">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hierarchical clustering with maximum density paths and mixture models </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Ritzert,+M">Martin Ritzert</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Turishcheva,+P">Polina Turishcheva</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Hansel,+L">Laura Hansel</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Wollenhaupt,+P">Paul Wollenhaupt</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Weis,+M">Marissa Weis</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ecker,+A">Alexander Ecker</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Hierarchical clustering is an effective and interpretable technique for analyzing structure in data, offering a nuanced understanding by revealing insights at multiple scales and resolutions. It is particularly helpful in settings where the exact number of clusters is unknown, and provides a robust framework for exploring complex datasets. Additionally, hierarchical clustering can uncover inner structures within clusters, capturing subtle relationships and nested patterns that may be obscured by traditional flat clustering methods. However, existing hierarchical clustering methods struggle with high-dimensional data, especially when there are no clear density gaps between modes. Our method addresses this limitation by leveraging a two-stage approach, first employing a Gaussian or Student's t mixture model to overcluster the data, and then hierarchically merging clusters based on the induced density landscape. This approach yields state-of-the-art clustering performance while also providing a meaningful hierarchy, making it a valuable tool for exploratory data analysis. Code is available at <a href="https://github.com/ecker-lab/tneb" rel="external noopener nofollow" class="link-external link-https">this https URL</a> clustering. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.15704" title="Abstract" id="2503.15704"> arXiv:2503.15704 </a> [<a href="/pdf/2503.15704" title="Download PDF" id="pdf-2503.15704" aria-labelledby="pdf-2503.15704">pdf</a>, <a href="/format/2503.15704" title="Other formats" id="oth-2503.15704" aria-labelledby="oth-2503.15704">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Tuning Sequential Monte Carlo Samplers via Greedy Incremental Divergence Minimization </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Kim,+K">Kyurae Kim</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Xu,+Z">Zuheng Xu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Gardner,+J+R">Jacob R. Gardner</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Campbell,+T">Trevor Campbell</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Computation (stat.CO) </div> <p class='mathjax'> The performance of sequential Monte Carlo (SMC) samplers heavily depends on the tuning of the Markov kernels used in the path proposal. For SMC samplers with unadjusted Markov kernels, standard tuning objectives, such as the Metropolis-Hastings acceptance rate or the expected-squared jump distance, are no longer applicable. While stochastic gradient-based end-to-end optimization has been explored for tuning SMC samplers, they often incur excessive training costs, even for tuning just the kernel step sizes. In this work, we propose a general adaptation framework for tuning the Markov kernels in SMC samplers by minimizing the incremental Kullback-Leibler (KL) divergence between the proposal and target paths. For step size tuning, we provide a gradient- and tuning-free algorithm that is generally applicable for kernels such as Langevin Monte Carlo (LMC). We further demonstrate the utility of our approach by providing a tailored scheme for tuning \textit{kinetic} LMC used in SMC samplers. Our implementations are able to obtain a full \textit{schedule} of tuned parameters at the cost of a few vanilla SMC runs, which is a fraction of gradient-based approaches. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.16206" title="Abstract" id="2503.16206"> arXiv:2503.16206 </a> [<a href="/pdf/2503.16206" title="Download PDF" id="pdf-2503.16206" aria-labelledby="pdf-2503.16206">pdf</a>, <a href="/format/2503.16206" title="Other formats" id="oth-2503.16206" aria-labelledby="oth-2503.16206">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Interpretable Neural Causal Models with TRAM-DAGs </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Sick,+B">Beate Sick</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=D%C3%BCrr,+O">Oliver D眉rr</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at the CLeaR 2025 Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The ultimate goal of most scientific studies is to understand the underlying causal mechanism between the involved variables. Structural causal models (SCMs) are widely used to represent such causal mechanisms. Given an SCM, causal queries on all three levels of Pearl's causal hierarchy can be answered: $L_1$ observational, $L_2$ interventional, and $L_3$ counterfactual. An essential aspect of modeling the SCM is to model the dependency of each variable on its causal parents. Traditionally this is done by parametric statistical models, such as linear or logistic regression models. This allows to handle all kinds of data types and fit interpretable models but bears the risk of introducing a bias. More recently neural causal models came up using neural networks (NNs) to model the causal relationships, allowing the estimation of nearly any underlying functional form without bias. However, current neural causal models are generally restricted to continuous variables and do not yield an interpretable form of the causal relationships. Transformation models range from simple statistical regressions to complex networks and can handle continuous, ordinal, and binary data. Here, we propose to use TRAMs to model the functional relationships in SCMs allowing us to bridge the gap between interpretability and flexibility in causal modeling. We call this method TRAM-DAG and assume currently that the underlying directed acyclic graph is known. For the fully observed case, we benchmark TRAM-DAGs against state-of-the-art statistical and NN-based causal models. We show that TRAM-DAGs are interpretable but also achieve equal or superior performance in queries ranging from $L_1$ to $L_3$ in the causal hierarchy. For the continuous case, TRAM-DAGs allow for counterfactual queries for three common causal structures, including unobserved confounding. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.16382" title="Abstract" id="2503.16382"> arXiv:2503.16382 </a> [<a href="/pdf/2503.16382" title="Download PDF" id="pdf-2503.16382" aria-labelledby="pdf-2503.16382">pdf</a>, <a href="https://arxiv.org/html/2503.16382v1" title="View HTML" id="html-2503.16382" aria-labelledby="html-2503.16382" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16382" title="Other formats" id="oth-2503.16382" aria-labelledby="oth-2503.16382">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sparse Nonparametric Contextual Bandits </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Flynn,+H">Hamish Flynn</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Olkhovskaya,+J">Julia Olkhovskaya</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Rognon-Vael,+P">Paul Rognon-Vael</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 45 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> This paper studies the problem of simultaneously learning relevant features and minimising regret in contextual bandit problems. We introduce and analyse a new class of contextual bandit problems, called sparse nonparametric contextual bandits, in which the expected reward function lies in the linear span of a small unknown set of features that belongs to a known infinite set of candidate features. We consider two notions of sparsity, for which the set of candidate features is either countable or uncountable. Our contribution is two-fold. First, we provide lower bounds on the minimax regret, which show that polynomial dependence on the number of actions is generally unavoidable in this setting. Second, we show that a variant of the Feel-Good Thompson Sampling algorithm enjoys regret bounds that match our lower bounds up to logarithmic factors of the horizon, and have logarithmic dependence on the effective number of candidate features. When we apply our results to kernelised and neural contextual bandits, we find that sparsity always enables better regret bounds, as long as the horizon is large enough relative to the sparsity and the number of actions. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 7 of 7 entries)</h3> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.15545" title="Abstract" id="2503.15545"> arXiv:2503.15545 </a> (cross-list from cs.LG) [<a href="/pdf/2503.15545" title="Download PDF" id="pdf-2503.15545" aria-labelledby="pdf-2503.15545">pdf</a>, <a href="/format/2503.15545" title="Other formats" id="oth-2503.15545" aria-labelledby="oth-2503.15545">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Data-Driven Approximation of Binary-State Network Reliability Function: Algorithm Selection and Reliability Thresholds for Large-Scale Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yeh,+W">Wei-Chang Yeh</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Numerical Analysis (math.NA); Machine Learning (stat.ML) </div> <p class='mathjax'> Network reliability assessment is pivotal for ensuring the robustness of modern infrastructure systems, from power grids to communication networks. While exact reliability computation for binary-state networks is NP-hard, existing approximation methods face critical tradeoffs between accuracy, scalability, and data efficiency. This study evaluates 20 machine learning methods across three reliability regimes full range (0.0-1.0), high reliability (0.9-1.0), and ultra high reliability (0.99-1.0) to address these gaps. We demonstrate that large-scale networks with arc reliability larger than or equal to 0.9 exhibit near-unity system reliability, enabling computational simplifications. Further, we establish a dataset-scale-driven paradigm for algorithm selection: Artificial Neural Networks (ANN) excel with limited data, while Polynomial Regression (PR) achieves superior accuracy in data-rich environments. Our findings reveal ANN's Test-MSE of 7.24E-05 at 30,000 samples and PR's optimal performance (5.61E-05) at 40,000 samples, outperforming traditional Monte Carlo simulations. These insights provide actionable guidelines for balancing accuracy, interpretability, and computational efficiency in reliability engineering, with implications for infrastructure resilience and system optimization. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2503.15650" title="Abstract" id="2503.15650"> arXiv:2503.15650 </a> (cross-list from cs.LG) [<a href="/pdf/2503.15650" title="Download PDF" id="pdf-2503.15650" aria-labelledby="pdf-2503.15650">pdf</a>, <a href="https://arxiv.org/html/2503.15650v1" title="View HTML" id="html-2503.15650" aria-labelledby="html-2503.15650" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15650" title="Other formats" id="oth-2503.15650" aria-labelledby="oth-2503.15650">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Survey on Generalization Theory for Graph Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Vasileiou,+A">Antonis Vasileiou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jegelka,+S">Stefanie Jegelka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Levie,+R">Ron Levie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Morris,+C">Christopher Morris</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> Message-passing graph neural networks (MPNNs) have emerged as the leading approach for machine learning on graphs, attracting significant attention in recent years. While a large set of works explored the expressivity of MPNNs, i.e., their ability to separate graphs and approximate functions over them, comparatively less attention has been directed toward investigating their generalization abilities, i.e., making meaningful predictions beyond the training data. Here, we systematically review the existing literature on the generalization abilities of MPNNs. We analyze the strengths and limitations of various studies in these domains, providing insights into their methodologies and findings. Furthermore, we identify potential avenues for future research, aiming to deepen our understanding of the generalization abilities of MPNNs. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2503.15922" title="Abstract" id="2503.15922"> arXiv:2503.15922 </a> (cross-list from math.ST) [<a href="/pdf/2503.15922" title="Download PDF" id="pdf-2503.15922" aria-labelledby="pdf-2503.15922">pdf</a>, <a href="/format/2503.15922" title="Other formats" id="oth-2503.15922" aria-labelledby="oth-2503.15922">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> General reproducing properties in RKHS with application to derivative and integral operators </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=El-Boukkouri,+F">Fatima-Zahrae El-Boukkouri</a> (INSA Toulouse, IMT), <a href="https://arxiv.org/search/math?searchtype=author&query=Garnier,+J">Josselin Garnier</a> (CMAP, ASCII), <a href="https://arxiv.org/search/math?searchtype=author&query=Roustant,+O">Olivier Roustant</a> (INSA Toulouse, IMT, RT-UQ)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> In this paper, we generalize the reproducing property in Reproducing Kernel Hilbert Spaces (RKHS). We establish a reproducing property for the closure of the class of combinations of composition operators under minimal conditions. As an application, we improve the existing sufficient conditions for the reproducing property to hold for the derivative operator, as well as for the existence of the mean embedding function. These results extend the scope of applicability of the representer theorem for regularized learning algorithms that involve data for function values, gradients, or any other operator from the considered class. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2503.15962" title="Abstract" id="2503.15962"> arXiv:2503.15962 </a> (cross-list from cs.LG) [<a href="/pdf/2503.15962" title="Download PDF" id="pdf-2503.15962" aria-labelledby="pdf-2503.15962">pdf</a>, <a href="/format/2503.15962" title="Other formats" id="oth-2503.15962" aria-labelledby="oth-2503.15962">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Information maximization for a broad variety of multi-armed bandit games </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Barbier-Chebbah,+A">Alex Barbier-Chebbah</a> (EPIMETHEE), <a href="https://arxiv.org/search/cs?searchtype=author&query=Vestergaard,+C+L">Christian L. Vestergaard</a> (EPIMETHEE), <a href="https://arxiv.org/search/cs?searchtype=author&query=Masson,+J">Jean-Baptiste Masson</a> (EPIMETHEE)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Statistical Mechanics (cond-mat.stat-mech); Machine Learning (stat.ML) </div> <p class='mathjax'> Information and free-energy maximization are physics principles that provide general rules for an agent to optimize actions in line with specific goals and policies. These principles are the building blocks for designing decision-making policies capable of efficient performance with only partial information. Notably, the information maximization principle has shown remarkable success in the classical bandit problem and has recently been shown to yield optimal algorithms for Gaussian and sub-Gaussian reward distributions. This article explores a broad extension of physics-based approaches to more complex and structured bandit problems. To this end, we cover three distinct types of bandit problems, where information maximization is adapted and leads to strong performance. Since the main challenge of information maximization lies in avoiding over-exploration, we highlight how information is tailored at various levels to mitigate this issue, paving the way for more efficient and robust decision-making strategies. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2503.15972" title="Abstract" id="2503.15972"> arXiv:2503.15972 </a> (cross-list from cs.LG) [<a href="/pdf/2503.15972" title="Download PDF" id="pdf-2503.15972" aria-labelledby="pdf-2503.15972">pdf</a>, <a href="https://arxiv.org/html/2503.15972v1" title="View HTML" id="html-2503.15972" aria-labelledby="html-2503.15972" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15972" title="Other formats" id="oth-2503.15972" aria-labelledby="oth-2503.15972">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TVineSynth: A Truncated C-Vine Copula Generator of Synthetic Tabular Data to Balance Privacy and Utility </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Griesbauer,+E">Elisabeth Griesbauer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Czado,+C">Claudia Czado</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Frigessi,+A">Arnoldo Frigessi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haff,+I+H">Ingrid Hob忙k Haff</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at the 28th International Conference on Artificial Intelligence and Statistics (AISTATS 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> We propose TVineSynth, a vine copula based synthetic tabular data generator, which is designed to balance privacy and utility, using the vine tree structure and its truncation to do the trade-off. Contrary to synthetic data generators that achieve DP by globally adding noise, TVineSynth performs a controlled approximation of the estimated data generating distribution, so that it does not suffer from poor utility of the resulting synthetic data for downstream prediction tasks. TVineSynth introduces a targeted bias into the vine copula model that, combined with the specific tree structure of the vine, causes the model to zero out privacy-leaking dependencies while relying on those that are beneficial for utility. Privacy is here measured with membership (MIA) and attribute inference attacks (AIA). Further, we theoretically justify how the construction of TVineSynth ensures AIA privacy under a natural privacy measure for continuous sensitive attributes. When compared to competitor models, with and without DP, on simulated and on real-world data, TVineSynth achieves a superior privacy-utility balance. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2503.16187" title="Abstract" id="2503.16187"> arXiv:2503.16187 </a> (cross-list from cs.LG) [<a href="/pdf/2503.16187" title="Download PDF" id="pdf-2503.16187" aria-labelledby="pdf-2503.16187">pdf</a>, <a href="https://arxiv.org/html/2503.16187v1" title="View HTML" id="html-2503.16187" aria-labelledby="html-2503.16187" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16187" title="Other formats" id="oth-2503.16187" aria-labelledby="oth-2503.16187">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Manifold learning in metric spaces </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+L">Liane Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Singer,+A">Amit Singer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Laplacian-based methods are popular for dimensionality reduction of data lying in $\mathbb{R}^N$. Several theoretical results for these algorithms depend on the fact that the Euclidean distance approximates the geodesic distance on the underlying submanifold which the data are assumed to lie on. However, for some applications, other metrics, such as the Wasserstein distance, may provide a more appropriate notion of distance than the Euclidean distance. We provide a framework that generalizes the problem of manifold learning to metric spaces and study when a metric satisfies sufficient conditions for the pointwise convergence of the graph Laplacian. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2503.16222" title="Abstract" id="2503.16222"> arXiv:2503.16222 </a> (cross-list from stat.CO) [<a href="/pdf/2503.16222" title="Download PDF" id="pdf-2503.16222" aria-labelledby="pdf-2503.16222">pdf</a>, <a href="https://arxiv.org/html/2503.16222v1" title="View HTML" id="html-2503.16222" aria-labelledby="html-2503.16222" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16222" title="Other formats" id="oth-2503.16222" aria-labelledby="oth-2503.16222">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Efficient Bayesian Computation Using Plug-and-Play Priors for Poisson Inverse Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Klatzer,+T">Teresa Klatzer</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Melidonis,+S">Savvas Melidonis</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pereyra,+M">Marcelo Pereyra</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zygalakis,+K+C">Konstantinos C. Zygalakis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages, 17 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation (stat.CO)</span>; Computer Vision and Pattern Recognition (cs.CV); Numerical Analysis (math.NA); Machine Learning (stat.ML) </div> <p class='mathjax'> This paper introduces a novel plug-and-play (PnP) Langevin sampling methodology for Bayesian inference in low-photon Poisson imaging problems, a challenging class of problems with significant applications in astronomy, medicine, and biology. PnP Langevin sampling algorithms offer a powerful framework for Bayesian image restoration, enabling accurate point estimation as well as advanced inference tasks, including uncertainty quantification and visualization analyses, and empirical Bayesian inference for automatic model parameter tuning. However, existing PnP Langevin algorithms are not well-suited for low-photon Poisson imaging due to high solution uncertainty and poor regularity properties, such as exploding gradients and non-negativity constraints. To address these challenges, we propose two strategies for extending Langevin PnP sampling to Poisson imaging models: (i) an accelerated PnP Langevin method that incorporates boundary reflections and a Poisson likelihood approximation and (ii) a mirror sampling algorithm that leverages a Riemannian geometry to handle the constraints and the poor regularity of the likelihood without approximations. The effectiveness of these approaches is demonstrated through extensive numerical experiments and comparisons with state-of-the-art methods. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 19 of 19 entries)</h3> <dt> <a name='item12'>[12]</a> <a href ="/abs/2110.01729" title="Abstract" id="2110.01729"> arXiv:2110.01729 </a> (replaced) [<a href="/pdf/2110.01729" title="Download PDF" id="pdf-2110.01729" aria-labelledby="pdf-2110.01729">pdf</a>, <a href="/format/2110.01729" title="Other formats" id="oth-2110.01729" aria-labelledby="oth-2110.01729">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Stochastic tensor space feature theory with applications to robust machine learning </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Castrillon-Candas,+J+E">Julio Enrique Castrillon-Candas</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Liu,+D">Dingning Liu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Yang,+S">Sicheng Yang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhang,+X">Xiaoling Zhang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Kon,+M">Mark Kon</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> In this paper we develop a Multilevel Orthogonal Subspace (MOS) Karhunen-Loeve feature theory based on stochastic tensor spaces, for the construction of robust machine learning features. Training data is treated as instances of a random field within a relevant Bochner space. Our key observation is that separate machine learning classes can reside predominantly in mostly distinct subspaces. Using the Karhunen-Loeve expansion and a hierarchical expansion of the first (nominal) class, a MOS is constructed to detect anomalous signal components, treating the second class as an outlier of the first. The projection coefficients of the input data into these subspaces are then used to train a Machine Learning (ML) classifier. These coefficients become new features from which much clearer separation surfaces can arise for the underlying classes. Tests in the blood plasma dataset (Alzheimer's Disease Neuroimaging Initiative) show dramatic increases in accuracy. This is in contrast to popular ML methods such as Gradient Boosting, RUS Boost, Random Forest and (Convolutional) Neural Networks. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2302.01701" title="Abstract" id="2302.01701"> arXiv:2302.01701 </a> (replaced) [<a href="/pdf/2302.01701" title="Download PDF" id="pdf-2302.01701" aria-labelledby="pdf-2302.01701">pdf</a>, <a href="https://arxiv.org/html/2302.01701v2" title="View HTML" id="html-2302.01701" aria-labelledby="html-2302.01701" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2302.01701" title="Other formats" id="oth-2302.01701" aria-labelledby="oth-2302.01701">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Assessment of Spatio-Temporal Predictors in the Presence of Missing and Heterogeneous Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Zambon,+D">Daniele Zambon</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Alippi,+C">Cesare Alippi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Deep learning approaches achieve outstanding predictive performance in modeling modern data, despite the increasing complexity and scale. However, evaluating the quality of predictive models becomes more challenging, as traditional statistical assumptions often no longer hold. In particular, spatio-temporal data exhibit dependencies across both time and space, often involving nonlinear dynamics, non-stationarities, and missing observations. As a result, advanced predictors such as spatio-temporal graph neural networks require novel evaluation methodologies. This paper introduces a residual correlation analysis framework designed to assess the optimality of spatio-temporal predictive neural models, particularly in scenarios with incomplete and heterogeneous data. By leveraging the principle that residual correlation indicates information not captured by the model, this framework serves as a powerful tool to identify and localize regions in space and time where model performance can be improved. A key advantage of the proposed approach is its ability to operate under minimal assumptions, enabling robust evaluation of deep learning models applied to multivariate time series, even in the presence of missing and heterogeneous data. The methodology employs tailored spatio-temporal graphs to encode sparse spatial and temporal dependencies within the data and utilizes asymptotically distribution-free summary statistics to pinpoint time intervals and spatial regions where the model underperforms. The effectiveness of the proposed residual analysis is demonstrated through validation on both synthetic and real-world scenarios involving state-of-the-art predictive models. </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2309.02211" title="Abstract" id="2309.02211"> arXiv:2309.02211 </a> (replaced) [<a href="/pdf/2309.02211" title="Download PDF" id="pdf-2309.02211" aria-labelledby="pdf-2309.02211">pdf</a>, <a href="https://arxiv.org/html/2309.02211v4" title="View HTML" id="html-2309.02211" aria-labelledby="html-2309.02211" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2309.02211" title="Other formats" id="oth-2309.02211" aria-labelledby="oth-2309.02211">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Distributionally Robust Learning for Multi-source Unsupervised Domain Adaptation </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Wang,+Z">Zhenyu Wang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=B%C3%BChlmann,+P">Peter B眉hlmann</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Guo,+Z">Zijian Guo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Methodology (stat.ME) </div> <p class='mathjax'> Empirical risk minimization often performs poorly when the distribution of the target domain differs from those of source domains. To address such potential distribution shifts, we develop an unsupervised domain adaptation approach that leverages labeled data from multiple source domains and unlabeled data from the target domain. We introduce a distributionally robust model that optimizes an adversarial reward based on the explained variance across a class of target distributions, ensuring generalization to the target domain. We show that the proposed robust model is a weighted average of conditional outcome models from source domains. This formulation allows us to compute the robust model through the aggregation of source models, which can be estimated using various machine learning algorithms of the users' choice, such as random forests, boosting, and neural networks. Additionally, we introduce a bias-correction step to obtain a more accurate aggregation weight, which is effective for various machine learning algorithms. Our framework can be interpreted as a distributionally robust federated learning approach that satisfies privacy constraints while providing insights into the importance of each source for prediction on the target domain. The performance of our method is evaluated on both simulated and real data. </p> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2411.13922" title="Abstract" id="2411.13922"> arXiv:2411.13922 </a> (replaced) [<a href="/pdf/2411.13922" title="Download PDF" id="pdf-2411.13922" aria-labelledby="pdf-2411.13922">pdf</a>, <a href="https://arxiv.org/html/2411.13922v2" title="View HTML" id="html-2411.13922" aria-labelledby="html-2411.13922" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13922" title="Other formats" id="oth-2411.13922" aria-labelledby="oth-2411.13922">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exponentially Consistent Nonparametric Linkage-Based Clustering of Data Sequences </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Singh,+B">Bhupender Singh</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Rajagopalan,+A+R">Ananth Ram Rajagopalan</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bhashyam,+S">Srikrishna Bhashyam</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Information Theory (cs.IT); Machine Learning (cs.LG); Signal Processing (eess.SP) </div> <p class='mathjax'> In this paper, we consider nonparametric clustering of $M$ independent and identically distributed (i.i.d.) data sequences generated from {\em unknown} distributions. The distributions of the $M$ data sequences belong to $K$ underlying distribution clusters. Existing results on exponentially consistent nonparametric clustering algorithms, like single linkage-based (SLINK) clustering and $k$-medoids distribution clustering, assume that the maximum intra-cluster distance ($d_L$) is smaller than the minimum inter-cluster distance ($d_H$). First, in the fixed sample size (FSS) setting, we show that exponential consistency can be achieved for SLINK clustering under a less strict assumption, $d_I < d_H$, where $d_I$ is the maximum distance between any two sub-clusters of a cluster that partition the cluster. Note that $d_I < d_L$ in general. Thus, our results show that SLINK is exponentially consistent for a larger class of problems than previously known. In our simulations, we also identify examples where $k$-medoids clustering is unable to find the true clusters, but SLINK is exponentially consistent. Then, we propose a sequential clustering algorithm, named SLINK-SEQ, based on SLINK and prove that it is also exponentially consistent. Simulation results show that the SLINK-SEQ algorithm requires fewer expected number of samples than the FSS SLINK algorithm for the same probability of error. </p> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2502.13570" title="Abstract" id="2502.13570"> arXiv:2502.13570 </a> (replaced) [<a href="/pdf/2502.13570" title="Download PDF" id="pdf-2502.13570" aria-labelledby="pdf-2502.13570">pdf</a>, <a href="/format/2502.13570" title="Other formats" id="oth-2502.13570" aria-labelledby="oth-2502.13570">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Efficient Permutation-Based Kernel Two-Sample Test </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Chatalic,+A">Antoine Chatalic</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Letizia,+M">Marco Letizia</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Schreuder,+N">Nicolas Schreuder</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Rosasco,+L">Lorenzo Rosasco</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Statistics Theory (math.ST); Methodology (stat.ME) </div> <p class='mathjax'> Two-sample hypothesis testing-determining whether two sets of data are drawn from the same distribution-is a fundamental problem in statistics and machine learning with broad scientific applications. In the context of nonparametric testing, maximum mean discrepancy (MMD) has gained popularity as a test statistic due to its flexibility and strong theoretical foundations. However, its use in large-scale scenarios is plagued by high computational costs. In this work, we use a Nystr枚m approximation of the MMD to design a computationally efficient and practical testing algorithm while preserving statistical guarantees. Our main result is a finite-sample bound on the power of the proposed test for distributions that are sufficiently separated with respect to the MMD. The derived separation rate matches the known minimax optimal rate in this setting. We support our findings with a series of numerical experiments, emphasizing realistic scientific data. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2502.15540" title="Abstract" id="2502.15540"> arXiv:2502.15540 </a> (replaced) [<a href="/pdf/2502.15540" title="Download PDF" id="pdf-2502.15540" aria-labelledby="pdf-2502.15540">pdf</a>, <a href="https://arxiv.org/html/2502.15540v2" title="View HTML" id="html-2502.15540" aria-labelledby="html-2502.15540" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.15540" title="Other formats" id="oth-2502.15540" aria-labelledby="oth-2502.15540">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generalization Guarantees for Representation Learning via Data-Dependent Gaussian Mixture Priors </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Sefidgaran,+M">Milad Sefidgaran</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zaidi,+A">Abdellatif Zaidi</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Krasnowski,+P">Piotr Krasnowski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted as a Spotlight Paper at ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Information Theory (cs.IT); Machine Learning (cs.LG) </div> <p class='mathjax'> We establish in-expectation and tail bounds on the generalization error of representation learning type algorithms. The bounds are in terms of the relative entropy between the distribution of the representations extracted from the training and "test'' datasets and a data-dependent symmetric prior, i.e., the Minimum Description Length (MDL) of the latent variables for the training and test datasets. Our bounds are shown to reflect the "structure" and "simplicity'' of the encoder and significantly improve upon the few existing ones for the studied model. We then use our in-expectation bound to devise a suitable data-dependent regularizer; and we investigate thoroughly the important question of the selection of the prior. We propose a systematic approach to simultaneously learning a data-dependent Gaussian mixture prior and using it as a regularizer. Interestingly, we show that a weighted attention mechanism emerges naturally in this procedure. Our experiments show that our approach outperforms the now popular Variational Information Bottleneck (VIB) method as well as the recent Category-Dependent VIB (CDVIB). </p> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2312.00267" title="Abstract" id="2312.00267"> arXiv:2312.00267 </a> (replaced) [<a href="/pdf/2312.00267" title="Download PDF" id="pdf-2312.00267" aria-labelledby="pdf-2312.00267">pdf</a>, <a href="https://arxiv.org/html/2312.00267v3" title="View HTML" id="html-2312.00267" aria-labelledby="html-2312.00267" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.00267" title="Other formats" id="oth-2312.00267" aria-labelledby="oth-2312.00267">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sample Efficient Preference Alignment in LLMs via Active Exploration </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mehta,+V">Viraj Mehta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Belakaria,+S">Syrine Belakaria</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Das,+V">Vikramjeet Das</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Neopane,+O">Ojash Neopane</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+Y">Yijia Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bogunovic,+I">Ilija Bogunovic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Engelhardt,+B">Barbara Engelhardt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ermon,+S">Stefano Ermon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schneider,+J">Jeff Schneider</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Neiswanger,+W">Willie Neiswanger</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> Preference-based feedback is important for many applications in machine learning where evaluation of a reward function is not feasible. Notable recent examples arise in preference alignment for large language models, including in reinforcement learning from human feedback (RLHF) and direct preference optimization (DPO). For many applications of preference alignment, the cost of acquiring human feedback can be substantial. In this work, we take advantage of the fact that one can often choose contexts at which to obtain human feedback to most efficiently identify a good policy, and formalize the setting as an active contextual dueling bandit problem. We propose an active exploration algorithm to efficiently select the data and provide theoretical proof that it has a polynomial worst-case regret bound. We extend the setting and methodology for practical use in preference alignment of large language models. We provide two extensions, an online and an offline approach. Our method outperforms the baselines with limited samples of human preferences on several language models and four real-world datasets including two new datasets that we contribute to the literature. </p> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2312.14141" title="Abstract" id="2312.14141"> arXiv:2312.14141 </a> (replaced) [<a href="/pdf/2312.14141" title="Download PDF" id="pdf-2312.14141" aria-labelledby="pdf-2312.14141">pdf</a>, <a href="https://arxiv.org/html/2312.14141v3" title="View HTML" id="html-2312.14141" aria-labelledby="html-2312.14141" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.14141" title="Other formats" id="oth-2312.14141" aria-labelledby="oth-2312.14141">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantum Algorithms for the Pathwise Lasso </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Doriguello,+J+F">Joao F. Doriguello</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Lim,+D">Debbie Lim</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Pun,+C+S">Chi Seng Pun</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Rebentrost,+P">Patrick Rebentrost</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Vaidya,+T">Tushar Vaidya</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 54 pages. v2: several improvements, typos fixed, references added, fixed a bug in Theorem 28, exponentially improved the complexity dependence on the number of observations $n$ for a random Gaussian input matrix; v3: new lower bounds added, published version at Quantum Journal </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Machine Learning (cs.LG); Optimization and Control (math.OC); Machine Learning (stat.ML) </div> <p class='mathjax'> We present a novel quantum high-dimensional linear regression algorithm with an $\ell_1$-penalty based on the classical LARS (Least Angle Regression) pathwise algorithm. Similarly to available classical algorithms for Lasso, our quantum algorithm provides the full regularisation path as the penalty term varies, but quadratically faster per iteration under specific conditions. A quadratic speedup on the number of features $d$ is possible by using the simple quantum minimum-finding subroutine from D眉rr and Hoyer (arXiv'96) in order to obtain the joining time at each iteration. We then improve upon this simple quantum algorithm and obtain a quadratic speedup both in the number of features $d$ and the number of observations $n$ by using the approximate quantum minimum-finding subroutine from Chen and de Wolf (ICALP'23). In order to do so, we approximately compute the joining times to be searched over by the approximate quantum minimum-finding subroutine. As another main contribution, we prove, via an approximate version of the KKT conditions and a duality gap, that the LARS algorithm (and therefore our quantum algorithm) is robust to errors. This means that it still outputs a path that minimises the Lasso cost function up to a small error if the joining times are only approximately computed. Furthermore, we show that, when the observations are sampled from a Gaussian distribution, our quantum algorithm's complexity only depends polylogarithmically on $n$, exponentially better than the classical LARS algorithm, while keeping the quadratic improvement on $d$. Moreover, we propose a dequantised version of our quantum algorithm that also retains the polylogarithmic dependence on $n$, albeit presenting the linear scaling on $d$ from the standard LARS algorithm. Finally, we prove query lower bounds for classical and quantum Lasso algorithms. </p> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2401.14161" title="Abstract" id="2401.14161"> arXiv:2401.14161 </a> (replaced) [<a href="/pdf/2401.14161" title="Download PDF" id="pdf-2401.14161" aria-labelledby="pdf-2401.14161">pdf</a>, <a href="/format/2401.14161" title="Other formats" id="oth-2401.14161" aria-labelledby="oth-2401.14161">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Adapting tree-based multiple imputation methods for multi-level data? A simulation study </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=F%C3%B6ge,+N">Nico F枚ge</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Schwerter,+J">Jakob Schwerter</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Gurtskaia,+K">Ketevan Gurtskaia</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pauly,+M">Markus Pauly</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Doebler,+P">Philipp Doebler</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Applications (stat.AP)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> When data have a hierarchical structure, such as students nested within classrooms, ignoring dependencies between observations can compromise the validity of imputation procedures. Standard tree-based imputation methods implicitly assume independence between observations, limiting their applicability in multilevel data settings. Although Multivariate Imputation by Chained Equations (MICE) is widely used for hierarchical data, it has limitations, including sensitivity to model specification and computational complexity. Alternative tree-based approaches have shown promise for individual-level data, but remain largely unexplored for hierarchical contexts. In this simulation study, we systematically evaluate the performance of novel tree-based methods--Chained Random Forests and Extreme Gradient Boosting (mixgb)--explicitly adapted for multi-level data by incorporating dummy variables indicating cluster membership. We compare these tree-based methods and their adapted versions with traditional MICE imputation in terms of coefficient estimation bias, type I error rates and statistical power, under different cluster sizes, missingness mechanisms and missingness rates, using both random intercept and random slope data generation models. The results show that MICE provides robust and accurate inference for level 2 variables, especially at low missingness rates. However, the adapted boosting approach (mixgb with cluster dummies) consistently outperforms other methods for Level-1 variables at higher missingness rates (30%, 50%). For level 2 variables, while MICE retains better power at moderate missingness (30%), adapted boosting becomes superior at high missingness (50%), regardless of the missingness mechanism or cluster size. These findings highlight the potential of appropriately adapted tree-based imputation methods as effective alternatives to conventional MICE in multilevel data analyses. </p> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2406.02413" title="Abstract" id="2406.02413"> arXiv:2406.02413 </a> (replaced) [<a href="/pdf/2406.02413" title="Download PDF" id="pdf-2406.02413" aria-labelledby="pdf-2406.02413">pdf</a>, <a href="https://arxiv.org/html/2406.02413v2" title="View HTML" id="html-2406.02413" aria-labelledby="html-2406.02413" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.02413" title="Other formats" id="oth-2406.02413" aria-labelledby="oth-2406.02413">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Variance-Reduced Fast Krasnoselkii-Mann Methods for Finite-Sum Root-Finding Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Tran-Dinh,+Q">Quoc Tran-Dinh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> We propose a new class of fast Krasnoselkii--Mann methods with variance reduction to solve a finite-sum co-coercive equation $Gx = 0$. Our algorithm is single-loop and leverages a new family of unbiased variance-reduced estimators specifically designed for a wider class of root-finding algorithms. Our method achieves both $\mathcal{O}(1/k^2)$ and $o(1/k^2)$ last-iterate convergence rates in terms of $\mathbb{E}[\| Gx^k\|^2]$, where $k$ is the iteration counter and $\mathbb{E}[\cdot]$ is the total expectation. We also establish almost sure $o(1/k^2)$ convergence rates and the almost sure convergence of iterates $\{x^k\}$ to a solution of $Gx=0$. We instantiate our framework for two prominent estimators: SVRG and SAGA. By an appropriate choice of parameters, both variants attain an oracle complexity of $\mathcal{O}(n + n^{2/3}\epsilon^{-1})$ to reach an $\epsilon$-solution, where $n$ represents the number of summands in the finite-sum operator $G$. Furthermore, under $\sigma$-strong quasi-monotonicity, our method achieves a linear convergence rate and an oracle complexity of $\mathcal{O}(n+ \max\{n, n^{2/3}\kappa\} \log(\frac{1}{\epsilon}))$, where $\kappa := L/\sigma$. We extend our approach to solve a class of finite-sum inclusions (possibly nonmonotone), demonstrating that our schemes retain the same theoretical guarantees as in the equation setting. Finally, numerical experiments validate our algorithms and demonstrate their promising performance compared to state-of-the-art methods. </p> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2409.00553" title="Abstract" id="2409.00553"> arXiv:2409.00553 </a> (replaced) [<a href="/pdf/2409.00553" title="Download PDF" id="pdf-2409.00553" aria-labelledby="pdf-2409.00553">pdf</a>, <a href="https://arxiv.org/html/2409.00553v2" title="View HTML" id="html-2409.00553" aria-labelledby="html-2409.00553" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.00553" title="Other formats" id="oth-2409.00553" aria-labelledby="oth-2409.00553">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-Output Distributional Fairness via Post-Processing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Gang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Q">Qihang Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ghosh,+A">Ayush Ghosh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+T">Tianbao Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY); Machine Learning (stat.ML) </div> <p class='mathjax'> The post-processing approaches are becoming prominent techniques to enhance machine learning models' fairness because of their intuitiveness, low computational cost, and excellent scalability. However, most existing post-processing methods are designed for task-specific fairness measures and are limited to single-output models. In this paper, we introduce a post-processing method for multi-output models, such as the ones used for multi-task/multi-class classification and representation learning, to enhance a model's distributional parity, a task-agnostic fairness measure. Existing methods for achieving distributional parity rely on the (inverse) cumulative density function of a model's output, restricting their applicability to single-output models. Extending previous works, we propose to employ optimal transport mappings to move a model's outputs across different groups towards their empirical Wasserstein barycenter. An approximation technique is applied to reduce the complexity of computing the exact barycenter and a kernel regression method is proposed to extend this process to out-of-sample data. Our empirical studies evaluate the proposed approach against various baselines on multi-task/multi-class classification and representation learning tasks, demonstrating the effectiveness of the proposed approach. </p> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2409.04982" title="Abstract" id="2409.04982"> arXiv:2409.04982 </a> (replaced) [<a href="/pdf/2409.04982" title="Download PDF" id="pdf-2409.04982" aria-labelledby="pdf-2409.04982">pdf</a>, <a href="https://arxiv.org/html/2409.04982v2" title="View HTML" id="html-2409.04982" aria-labelledby="html-2409.04982" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.04982" title="Other formats" id="oth-2409.04982" aria-labelledby="oth-2409.04982">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> 2DSig-Detect: a semi-supervised framework for anomaly detection on image data using 2D-signatures </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+X">Xinheng Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yamaguchi,+K">Kureha Yamaguchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Leblanc,+M">Margaux Leblanc</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Malzard,+S">Simon Malzard</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chhabra,+V">Varun Chhabra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nockles,+V">Victoria Nockles</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yue Wu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Probability (math.PR); Machine Learning (stat.ML) </div> <p class='mathjax'> The rapid advancement of machine learning technologies raises questions about the security of machine learning models, with respect to both training-time (poisoning) and test-time (evasion, impersonation, and inversion) attacks. Models performing image-related tasks, e.g. detection, and classification, are vulnerable to adversarial attacks that can degrade their performance and produce undesirable outcomes. This paper introduces a novel technique for anomaly detection in images called 2DSig-Detect, which uses a 2D-signature-embedded semi-supervised framework rooted in rough path theory. We demonstrate our method in adversarial settings for training-time and test-time attacks, and benchmark our framework against other state of the art methods. Using 2DSig-Detect for anomaly detection, we show both superior performance and a reduction in the computation time to detect the presence of adversarial perturbations in images. </p> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2410.07627" title="Abstract" id="2410.07627"> arXiv:2410.07627 </a> (replaced) [<a href="/pdf/2410.07627" title="Download PDF" id="pdf-2410.07627" aria-labelledby="pdf-2410.07627">pdf</a>, <a href="https://arxiv.org/html/2410.07627v2" title="View HTML" id="html-2410.07627" aria-labelledby="html-2410.07627" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.07627" title="Other formats" id="oth-2410.07627" aria-labelledby="oth-2410.07627">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Automatic Curriculum Expert Iteration for Reliable LLM Reasoning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zirui Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+H">Hanze Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Saha,+A">Amrita Saha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+C">Caiming Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sahoo,+D">Doyen Sahoo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (stat.ML) </div> <p class='mathjax'> Hallucinations (i.e., generating plausible but inaccurate content) and laziness (i.e. excessive refusals or defaulting to "I don't know") persist as major challenges in LLM reasoning. Current efforts to reduce hallucinations primarily focus on factual errors in knowledge-grounded tasks, often neglecting hallucinations related to faulty reasoning. Meanwhile, some approaches render LLMs overly conservative, limiting their problem-solving capabilities. To mitigate hallucination and laziness in reasoning tasks, we propose Automatic Curriculum Expert Iteration (Auto-CEI) to enhance LLM reasoning and align responses to the model's capabilities--assertively answering within its limits and declining when tasks exceed them. In our method, Expert Iteration explores the reasoning trajectories near the LLM policy, guiding incorrect paths back on track to reduce compounding errors and improve robustness; it also promotes appropriate "I don't know" responses after sufficient reasoning attempts. The curriculum automatically adjusts rewards, incentivizing extended reasoning before acknowledging incapability, thereby pushing the limits of LLM reasoning and aligning its behaviour with these limits. We compare Auto-CEI with various SOTA baselines across logical reasoning, mathematics, and planning tasks, where Auto-CEI achieves superior alignment by effectively balancing assertiveness and conservativeness. The code is available at <a href="https://github.com/SalesforceAIResearch/Auto-CEI" rel="external noopener nofollow" class="link-external link-https">this https URL</a> . </p> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2410.14420" title="Abstract" id="2410.14420"> arXiv:2410.14420 </a> (replaced) [<a href="/pdf/2410.14420" title="Download PDF" id="pdf-2410.14420" aria-labelledby="pdf-2410.14420">pdf</a>, <a href="https://arxiv.org/html/2410.14420v2" title="View HTML" id="html-2410.14420" aria-labelledby="html-2410.14420" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.14420" title="Other formats" id="oth-2410.14420" aria-labelledby="oth-2410.14420">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Asymptotic non-linear shrinkage and eigenvector overlap for weighted sample covariance </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Oriol,+B">Benoit Oriol</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Machine Learning (cs.LG); Probability (math.PR); Applications (stat.AP); Machine Learning (stat.ML) </div> <p class='mathjax'> We compute asymptotic non-linear shrinkage formulas for covariance and precision matrix estimators for weighted sample covariances, and the joint sample-population eigenvector overlap distribution, in the spirit of Ledoit and P茅ch茅. We detail explicitly the formulas for exponentially-weighted sample covariances. We propose an algorithm to numerically compute those formulas. Experimentally, we show the performance of the asymptotic non-linear shrinkage estimators. Finally, we test the robustness of the theory to a heavy-tailed distributions. </p> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2410.16138" title="Abstract" id="2410.16138"> arXiv:2410.16138 </a> (replaced) [<a href="/pdf/2410.16138" title="Download PDF" id="pdf-2410.16138" aria-labelledby="pdf-2410.16138">pdf</a>, <a href="https://arxiv.org/html/2410.16138v2" title="View HTML" id="html-2410.16138" aria-labelledby="html-2410.16138" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.16138" title="Other formats" id="oth-2410.16138" aria-labelledby="oth-2410.16138">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Theoretical Insights into Line Graph Transformation on Graph Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+F">Fan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xingyue Huang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, code available at <a href="https://github.com/lukeyf/graphs-and-lines" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Combinatorics (math.CO); Machine Learning (stat.ML) </div> <p class='mathjax'> Line graph transformation has been widely studied in graph theory, where each node in a line graph corresponds to an edge in the original graph. This has inspired a series of graph neural networks (GNNs) applied to transformed line graphs, which have proven effective in various graph representation learning tasks. However, there is limited theoretical study on how line graph transformation affects the expressivity of GNN models. In this study, we focus on two types of graphs known to be challenging to the Weisfeiler-Leman (WL) tests: Cai-F眉rer-Immerman (CFI) graphs and strongly regular graphs, and show that applying line graph transformation helps exclude these challenging graph properties, thus potentially assist WL tests in distinguishing these graphs. We empirically validate our findings by conducting a series of experiments that compare the accuracy and efficiency of graph isomorphism tests and GNNs on both line-transformed and original graphs across these graph structure types. </p> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2410.19464" title="Abstract" id="2410.19464"> arXiv:2410.19464 </a> (replaced) [<a href="/pdf/2410.19464" title="Download PDF" id="pdf-2410.19464" aria-labelledby="pdf-2410.19464">pdf</a>, <a href="https://arxiv.org/html/2410.19464v4" title="View HTML" id="html-2410.19464" aria-labelledby="html-2410.19464" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.19464" title="Other formats" id="oth-2410.19464" aria-labelledby="oth-2410.19464">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LOCAL: Learning with Orientation Matrix to Infer Causal Structure from Time Series Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jiajun Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiang,+B">Boyang Qiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+X">Xiaoyu Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+W">Weiwei Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+Y">Yue Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pedrycz,+W">Witold Pedrycz</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> Discovering the underlying Directed Acyclic Graph (DAG) from time series observational data is highly challenging due to the dynamic nature and complex nonlinear interactions between variables. Existing methods typically search for the optimal DAG by optimizing an objective function but face scalability challenges, as their computational demands grow exponentially with the dimensional expansion of variables. To this end, we propose LOCAL, a highly efficient, easy-to-implement, and constraint-free method for recovering dynamic causal structures. LOCAL is the first attempt to formulate a quasi-maximum likelihood-based score function for learning the dynamic DAG equivalent to the ground truth. Building on this, we introduce two adaptive modules that enhance the algebraic characterization of acyclicity: Asymptotic Causal Mask Learning (ACML) and Dynamic Graph Parameter Learning (DGPL). ACML constructs causal masks using learnable priority vectors and the Gumbel-Sigmoid function, ensuring DAG formation while optimizing computational efficiency. DGPL transforms causal learning into decomposed matrix products, capturing dynamic causal structure in high-dimensional data and improving interpretability. Extensive experiments on synthetic and real-world datasets demonstrate that LOCAL significantly outperforms existing methods and highlight LOCAL's potential as a robust and efficient method for dynamic causal discovery. </p> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2411.08390" title="Abstract" id="2411.08390"> arXiv:2411.08390 </a> (replaced) [<a href="/pdf/2411.08390" title="Download PDF" id="pdf-2411.08390" aria-labelledby="pdf-2411.08390">pdf</a>, <a href="https://arxiv.org/html/2411.08390v2" title="View HTML" id="html-2411.08390" aria-labelledby="html-2411.08390" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.08390" title="Other formats" id="oth-2411.08390" aria-labelledby="oth-2411.08390">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Expected Information Gain Estimation via Density Approximations: Sample Allocation and Dimension Reduction </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Li,+F">Fengyi Li</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Baptista,+R">Ricardo Baptista</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Marzouk,+Y">Youssef Marzouk</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Computation (stat.CO); Machine Learning (stat.ML) </div> <p class='mathjax'> Computing expected information gain (EIG) from prior to posterior (equivalently, mutual information between candidate observations and model parameters or other quantities of interest) is a fundamental challenge in Bayesian optimal experimental design. We formulate flexible transport-based schemes for EIG estimation in general nonlinear/non-Gaussian settings, compatible with both standard and implicit Bayesian models. These schemes are representative of two-stage methods for estimating or bounding EIG using marginal and conditional density estimates. In this setting, we analyze the optimal allocation of samples between training (density estimation) and approximation of the outer prior expectation. We show that with this optimal sample allocation, the MSE of the resulting EIG estimator converges more quickly than that of a standard nested Monte Carlo scheme. We then address the estimation of EIG in high dimensions, by deriving gradient-based upper bounds on the mutual information lost by projecting the parameters and/or observations to lower-dimensional subspaces. Minimizing these upper bounds yields projectors and hence low-dimensional EIG approximations that outperform approximations obtained via other linear dimension reduction schemes. Numerical experiments on a PDE-constrained Bayesian inverse problem also illustrate a favorable trade-off between dimension truncation and the modeling of non-Gaussianity, when estimating EIG from finite samples in high dimensions. </p> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2502.09986" title="Abstract" id="2502.09986"> arXiv:2502.09986 </a> (replaced) [<a href="/pdf/2502.09986" title="Download PDF" id="pdf-2502.09986" aria-labelledby="pdf-2502.09986">pdf</a>, <a href="https://arxiv.org/html/2502.09986v2" title="View HTML" id="html-2502.09986" aria-labelledby="html-2502.09986" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.09986" title="Other formats" id="oth-2502.09986" aria-labelledby="oth-2502.09986">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Statistical modeling of categorical trajectories with multivariate functional principal components </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Cardot,+H">Herv茅 Cardot</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Peltier,+C">Caroline Peltier</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> There are many examples in which the statistical units of interest are samples of a continuous time categorical random process, that is to say a continuous time stochastic process taking values in a finite state space. Without loosing any information, we associate to each state a binary random function, taking values in $\{0,1\}$, and turn the problem of statistical modeling of a categorical process into a multivariate functional data analysis issue. The (multivariate) covariance operator has nice interpretations in terms of departure from independence of the joint probabilities and the multivariate functional principal components are simple to interpret. Under the weak hypothesis assuming only continuity in probability of the $0-1$ trajectories, it is simple to build consistent estimators of the covariance kernel and perform multivariate functional principal components analysis. The sample paths being piecewise constant, with a finite number of jumps, this a rare case in functional data analysis in which the trajectories are not supposed to be continuous and can be observed exhaustively. The approach is illustrated on a data set of sensory perceptions, considering different gustometer-controlled stimuli experiments. We also show how it can be easily extended to analyze experiments, such as temporal check-all-that-apply, in which two states or more can be observed at the same time. </p> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2503.01924" title="Abstract" id="2503.01924"> arXiv:2503.01924 </a> (replaced) [<a href="/pdf/2503.01924" title="Download PDF" id="pdf-2503.01924" aria-labelledby="pdf-2503.01924">pdf</a>, <a href="https://arxiv.org/html/2503.01924v2" title="View HTML" id="html-2503.01924" aria-labelledby="html-2503.01924" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.01924" title="Other formats" id="oth-2503.01924" aria-labelledby="oth-2503.01924">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=YuHang,+W">Wang YuHang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+J">Junkang Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+A">Aolei Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+K">Kaihao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Z">Zaitong Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhenyu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+W">Wenfei Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jian Liu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Text: 8 pages of main content, 5 pages of appendices have been accepted by CVPR2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> Adversarial robustness is a critical challenge in deploying deep neural networks for real-world applications. While adversarial training is a widely recognized defense strategy, most existing studies focus on balanced datasets, overlooking the prevalence of long-tailed distributions in real-world data, which significantly complicates robustness. This paper provides a comprehensive analysis of adversarial training under long-tailed distributions and identifies limitations in the current state-of-the-art method, AT-BSL, in achieving robust performance under such conditions. To address these challenges, we propose a novel training framework, TAET, which integrates an initial stabilization phase followed by a stratified equalization adversarial training phase. Additionally, prior work on long-tailed robustness has largely ignored the crucial evaluation metric of balanced accuracy. To bridge this gap, we introduce the concept of balanced robustness, a comprehensive metric tailored for assessing robustness under long-tailed distributions. Extensive experiments demonstrate that our method surpasses existing advanced defenses, achieving significant improvements in both memory and computational efficiency. This work represents a substantial advancement in addressing robustness challenges in real-world applications. Our code is available at: <a href="https://github.com/BuhuiOK/TAET-Two-Stage-Adversarial-Equalization-Training-on-Long-Tailed-Distributions" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> </dl> <div class='paging'>Total of 30 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/stat.ML/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>