CINXE.COM

<!DOCTYPE html> <html lang="en"> <head> <title>Machine Learning </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/slider.css?v=20250312" /> <script src="//code.jquery.com/jquery-latest.min.js" type="text/javascript"></script> <script type="text/javascript" src="/static/browse/0.3.4/js/donate.js?v=040725"></script><script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <aside class="slider-wrapper bps-banner forum green"> <a class="close-slider do-close-slider bps-banner" href="#"><img src="/static/browse/0.3.4/images/icons/close-slider.png" alt="close this message"></a> <div class="columns"> <img role="presentation" class="bps-banner-image" src="/static/browse/0.3.4/images/icons/smileybones-pixel.png" alt="arXiv smileybones"> <div class="copy-donation bps-banner"> <h2>arXiv Is Hiring Software Developers</h2> <p>Work on one of the world's most important websites and make an impact on open science.</p> </div> <div class="amount-donation bps-banner"> <div class="donate-cta"><a class="banner_link banner-btn-grad" target="_blank" href="https://info.arxiv.org/hiring/index.html"><b>View Jobs</b></a></div> </div> </div> </aside> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div> <div class="column banner-minimal forum"> <p>arXiv Is Hiring Software Devs</p> <a href="https://info.arxiv.org/hiring/index.html" target="_blank">View Jobs</a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/stat.ML/recent">stat.ML</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Machine Learning</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item6">Cross-lists</a></li> <li><a href="#item11">Replacements</a></li> </ul> <p>See <a id="recent-stat.ML" aria-labelledby="recent-stat.ML" href="/list/stat.ML/recent">recent</a> articles</p> <h3>Showing new listings for Wednesday, 9 April 2025</h3> <div class='paging'>Total of 35 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/stat.ML/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 5 of 5 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2504.05349" title="Abstract" id="2504.05349"> arXiv:2504.05349 </a> [<a href="/pdf/2504.05349" title="Download PDF" id="pdf-2504.05349" aria-labelledby="pdf-2504.05349">pdf</a>, <a href="https://arxiv.org/html/2504.05349v1" title="View HTML" id="html-2504.05349" aria-labelledby="html-2504.05349" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05349" title="Other formats" id="oth-2504.05349" aria-labelledby="oth-2504.05349">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hyperflows: Pruning Reveals the Importance of Weights </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Barbulescu,+E">Eugen Barbulescu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Alexoaie,+A">Antonio Alexoaie</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Network pruning is used to reduce inference latency and power consumption in large neural networks. However, most existing methods struggle to accurately assess the importance of individual weights due to their inherent interrelatedness, leading to poor performance, especially at extreme sparsity levels. We introduce Hyperflows, a dynamic pruning approach that estimates each weight's importance by observing the network's gradient response to the weight's removal. A global pressure term continuously drives all weights toward pruning, with those critical for accuracy being automatically regrown based on their flow, the aggregated gradient signal when they are absent. We explore the relationship between final sparsity and pressure, deriving power-law equations similar to those found in neural scaling laws. Empirically, we demonstrate state-of-the-art results with ResNet-50 and VGG-19 on CIFAR-10 and CIFAR-100. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2504.05426" title="Abstract" id="2504.05426"> arXiv:2504.05426 </a> [<a href="/pdf/2504.05426" title="Download PDF" id="pdf-2504.05426" aria-labelledby="pdf-2504.05426">pdf</a>, <a href="https://arxiv.org/html/2504.05426v1" title="View HTML" id="html-2504.05426" aria-labelledby="html-2504.05426" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05426" title="Other formats" id="oth-2504.05426" aria-labelledby="oth-2504.05426">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Survey on Algorithms for multi-index models </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Bruna,+J">Joan Bruna</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Hsu,+D">Daniel Hsu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Methodology (stat.ME) </div> <p class='mathjax'> We review the literature on algorithms for estimating the index space in a multi-index model. The primary focus is on computationally efficient (polynomial-time) algorithms in Gaussian space, the assumptions under which consistency is guaranteed by these methods, and their sample complexity. In many cases, a gap is observed between the sample complexity of the best known computationally efficient methods and the information-theoretical minimum. We also review algorithms based on estimating the span of gradients using nonparametric methods, and algorithms based on fitting neural networks using gradient descent </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2504.05643" title="Abstract" id="2504.05643"> arXiv:2504.05643 </a> [<a href="/pdf/2504.05643" title="Download PDF" id="pdf-2504.05643" aria-labelledby="pdf-2504.05643">pdf</a>, <a href="https://arxiv.org/html/2504.05643v1" title="View HTML" id="html-2504.05643" aria-labelledby="html-2504.05643" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05643" title="Other formats" id="oth-2504.05643" aria-labelledby="oth-2504.05643">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improved Inference of Inverse Ising Problems under Missing Observations in Restricted Boltzmann Machines </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Sekimoto,+K">Kaiji Sekimoto</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Yasuda,+M">Muneki Yasuda</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Disordered Systems and Neural Networks (cond-mat.dis-nn); Machine Learning (cs.LG); Data Analysis, Statistics and Probability (physics.data-an) </div> <p class='mathjax'> Restricted Boltzmann machines (RBMs) are energy-based models analogous to the Ising model and are widely applied in statistical machine learning. The standard inverse Ising problem with a complete dataset requires computing both data and model expectations and is computationally challenging because model expectations have a combinatorial explosion. Furthermore, in many applications, the available datasets are partially incomplete, making it difficult to compute even data expectations. In this study, we propose a approximation framework for these expectations in the practical inverse Ising problems that integrates mean-field approximation or persistent contrastive divergence to generate refined initial points and spatial Monte Carlo integration to enhance estimator accuracy. We demonstrate that the proposed method effectively and accurately tunes the model parameters in comparison to the conventional method. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2504.05881" title="Abstract" id="2504.05881"> arXiv:2504.05881 </a> [<a href="/pdf/2504.05881" title="Download PDF" id="pdf-2504.05881" aria-labelledby="pdf-2504.05881">pdf</a>, <a href="https://arxiv.org/html/2504.05881v1" title="View HTML" id="html-2504.05881" aria-labelledby="html-2504.05881" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05881" title="Other formats" id="oth-2504.05881" aria-labelledby="oth-2504.05881">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Actuarial Learning for Pension Fund Mortality Forecasting </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=de+Melo,+E+F+L">Eduardo Fraga L. de Melo</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Graziadei,+H">Helton Graziadei</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Targino,+R">Rodrigo Targino</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 27 pages, 12 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> For the assessment of the financial soundness of a pension fund, it is necessary to take into account mortality forecasting so that longevity risk is consistently incorporated into future cash flows. In this article, we employ machine learning models applied to actuarial science ({\it actuarial learning}) to make mortality predictions for a relevant sample of pension funds' participants. Actuarial learning represents an emerging field that involves the application of machine learning (ML) and artificial intelligence (AI) techniques in actuarial science. This encompasses the use of algorithms and computational models to analyze large sets of actuarial data, such as regression trees, random forest, boosting, XGBoost, CatBoost, and neural networks (eg. FNN, LSTM, and MHA). Our results indicate that some ML/AI algorithms present competitive out-of-sample performance when compared to the classical Lee-Carter model. This may indicate interesting alternatives for consistent liability evaluation and effective pension fund risk management. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2504.05892" title="Abstract" id="2504.05892"> arXiv:2504.05892 </a> [<a href="/pdf/2504.05892" title="Download PDF" id="pdf-2504.05892" aria-labelledby="pdf-2504.05892">pdf</a>, <a href="https://arxiv.org/html/2504.05892v1" title="View HTML" id="html-2504.05892" aria-labelledby="html-2504.05892" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05892" title="Other formats" id="oth-2504.05892" aria-labelledby="oth-2504.05892">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Matched Topological Subspace Detector </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Liu,+C">Chengen Liu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Tenorio,+V+M">Victor M. Tenorio</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Marques,+A+G">Antonio G. Marques</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Isufi,+E">Elvin Isufi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> Topological spaces, represented by simplicial complexes, capture richer relationships than graphs by modeling interactions not only between nodes but also among higher-order entities, such as edges or triangles. This motivates the representation of information defined in irregular domains as topological signals. By leveraging the spectral dualities of Hodge and Dirac theory, practical topological signals often concentrate in specific spectral subspaces (e.g., gradient or curl). For instance, in a foreign currency exchange network, the exchange flow signals typically satisfy the arbitrage-free condition and hence are curl-free. However, the presence of anomalies can disrupt these conditions, causing the signals to deviate from such subspaces. In this work, we formulate a hypothesis testing framework to detect whether simplicial complex signals lie in specific subspaces in a principled and tractable manner. Concretely, we propose Neyman-Pearson matched topological subspace detectors for signals defined at a single simplicial level (such as edges) or jointly across all levels of a simplicial complex. The (energy-based projection) proposed detectors handle missing values, provide closed-form performance analysis, and effectively capture the unique topological properties of the data. We demonstrate the effectiveness of the proposed topological detectors on various real-world data, including foreign currency exchange networks. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 5 of 5 entries)</h3> <dt> <a name='item6'>[6]</a> <a href ="/abs/2504.05364" title="Abstract" id="2504.05364"> arXiv:2504.05364 </a> (cross-list from cs.SD) [<a href="/pdf/2504.05364" title="Download PDF" id="pdf-2504.05364" aria-labelledby="pdf-2504.05364">pdf</a>, <a href="/format/2504.05364" title="Other formats" id="oth-2504.05364" aria-labelledby="oth-2504.05364">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Of All StrIPEs: Investigating Structure-informed Positional Encoding for Efficient Music Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Agarwal,+M">Manvi Agarwal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Changhong Wang</a> (LTCI), <a href="https://arxiv.org/search/cs?searchtype=author&query=Richard,+G">Gael Richard</a> (S2A, IDS)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> While music remains a challenging domain for generative models like Transformers, a two-pronged approach has recently proved successful: inserting musically-relevant structural information into the positional encoding (PE) module and using kernel approximation techniques based on Random Fourier Features (RFF) to lower the computational cost from quadratic to linear. Yet, it is not clear how such RFF-based efficient PEs compare with those based on rotation matrices, such as Rotary Positional Encoding (RoPE). In this paper, we present a unified framework based on kernel methods to analyze both families of efficient PEs. We use this framework to develop a novel PE method called RoPEPool, capable of extracting causal relationships from temporal sequences. Using RFF-based PEs and rotation-based PEs, we demonstrate how seemingly disparate PEs can be jointly studied by considering the content-context interactions they induce. For empirical validation, we use a symbolic music generation task, namely, melody harmonization. We show that RoPEPool, combined with highly-informative structural priors, outperforms all methods. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2504.05405" title="Abstract" id="2504.05405"> arXiv:2504.05405 </a> (cross-list from cs.LG) [<a href="/pdf/2504.05405" title="Download PDF" id="pdf-2504.05405" aria-labelledby="pdf-2504.05405">pdf</a>, <a href="/format/2504.05405" title="Other formats" id="oth-2504.05405" aria-labelledby="oth-2504.05405">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Role of Environment Access in Agnostic Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Krishnamurthy,+A">Akshay Krishnamurthy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Gene Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sekhari,+A">Ayush Sekhari</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> comments welcome </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> We study Reinforcement Learning (RL) in environments with large state spaces, where function approximation is required for sample-efficient learning. Departing from a long history of prior work, we consider the weakest possible form of function approximation, called agnostic policy learning, where the learner seeks to find the best policy in a given class $\Pi$, with no guarantee that $\Pi$ contains an optimal policy for the underlying task. Although it is known that sample-efficient agnostic policy learning is not possible in the standard online RL setting without further assumptions, we investigate the extent to which this can be overcome with stronger forms of access to the environment. Specifically, we show that: 1. Agnostic policy learning remains statistically intractable when given access to a local simulator, from which one can reset to any previously seen state. This result holds even when the policy class is realizable, and stands in contrast to a positive result of [MFR24] showing that value-based learning under realizability is tractable with local simulator access. 2. Agnostic policy learning remains statistically intractable when given online access to a reset distribution with good coverage properties over the state space (the so-called $\mu$-reset setting). We also study stronger forms of function approximation for policy learning, showing that PSDP [BKSN03] and CPI [KL02] provably fail in the absence of policy completeness. 3. On a positive note, agnostic policy learning is statistically tractable for Block MDPs with access to both of the above reset models. We establish this via a new algorithm that carefully constructs a policy emulator: a tabular MDP with a small state space that approximates the value functions of all policies $\pi \in \Pi$. These values are approximated without any explicit value function class. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2504.05466" title="Abstract" id="2504.05466"> arXiv:2504.05466 </a> (cross-list from eess.SP) [<a href="/pdf/2504.05466" title="Download PDF" id="pdf-2504.05466" aria-labelledby="pdf-2504.05466">pdf</a>, <a href="/format/2504.05466" title="Other formats" id="oth-2504.05466" aria-labelledby="oth-2504.05466">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Solid-State Nanopore Signal Generator for Training Machine Learning Models </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Johnson,+J">Jaise Johnson</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Galigekere,+C+R">Chinmayi R Galigekere</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Varma,+M+M">Manoj M Varma</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Main text and supplementary information combined: 47 pages. Main text: 13 pages, 4 figures. Supplementary Information: 34 pages, 29 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Biological Physics (physics.bio-ph); Biomolecules (q-bio.BM); Machine Learning (stat.ML) </div> <p class='mathjax'> Translocation event detection from raw nanopore current signals is a fundamental step in nanopore signal analysis. Traditional data analysis methods rely on user-defined parameters to extract event information, making the interpretation of experimental results sensitive to parameter choice. While Machine Learning (ML) has seen widespread adoption across various scientific fields, its potential remains underexplored in solid-state nanopore research. <br>In this work, we introduce a nanopore signal generator capable of producing extensive synthetic datasets for machine learning applications and benchmarking nanopore signal analysis platforms. Using this generator, we train deep learning models to detect translocation events directly from raw signals, achieving over 99% true event detection with minimal false positives. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2504.05695" title="Abstract" id="2504.05695"> arXiv:2504.05695 </a> (cross-list from cs.LG) [<a href="/pdf/2504.05695" title="Download PDF" id="pdf-2504.05695" aria-labelledby="pdf-2504.05695">pdf</a>, <a href="https://arxiv.org/html/2504.05695v1" title="View HTML" id="html-2504.05695" aria-labelledby="html-2504.05695" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05695" title="Other formats" id="oth-2504.05695" aria-labelledby="oth-2504.05695">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Architecture independent generalization bounds for overparametrized deep ReLU networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+T">Thomas Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chien,+C+K">Chun-Kai Kevin Chien</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ewald,+P+M">Patricia Mu帽oz Ewald</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moore,+A+G">Andrew G. Moore</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> AMS Latex, 12 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Analysis of PDEs (math.AP); Optimization and Control (math.OC); Machine Learning (stat.ML) </div> <p class='mathjax'> We prove that overparametrized neural networks are able to generalize with a test error that is independent of the level of overparametrization, and independent of the Vapnik-Chervonenkis (VC) dimension. We prove explicit bounds that only depend on the metric geometry of the test and training sets, on the regularity properties of the activation function, and on the operator norms of the weights and norms of biases. For overparametrized deep ReLU networks with a training sample size bounded by the input space dimension, we explicitly construct zero loss minimizers without use of gradient descent, and prove that the generalization error is independent of the network architecture. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2504.06250" title="Abstract" id="2504.06250"> arXiv:2504.06250 </a> (cross-list from math.PR) [<a href="/pdf/2504.06250" title="Download PDF" id="pdf-2504.06250" aria-labelledby="pdf-2504.06250">pdf</a>, <a href="https://arxiv.org/html/2504.06250v1" title="View HTML" id="html-2504.06250" aria-labelledby="html-2504.06250" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.06250" title="Other formats" id="oth-2504.06250" aria-labelledby="oth-2504.06250">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fractal and Regular Geometry of Deep Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Di+Lillo,+S">Simmaco Di Lillo</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Marinucci,+D">Domenico Marinucci</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Salvi,+M">Michele Salvi</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Vigogna,+S">Stefano Vigogna</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Probability (math.PR)</span>; Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> We study the geometric properties of random neural networks by investigating the boundary volumes of their excursion sets for different activation functions, as the depth increases. More specifically, we show that, for activations which are not very regular (e.g., the Heaviside step function), the boundary volumes exhibit fractal behavior, with their Hausdorff dimension monotonically increasing with the depth. On the other hand, for activations which are more regular (e.g., ReLU, logistic and $\tanh$), as the depth increases, the expected boundary volumes can either converge to zero, remain constant or diverge exponentially, depending on a single spectral parameter which can be easily computed. Our theoretical results are confirmed in some numerical experiments based on Monte Carlo simulations. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 25 of 25 entries)</h3> <dt> <a name='item11'>[11]</a> <a href ="/abs/2010.12059" title="Abstract" id="2010.12059"> arXiv:2010.12059 </a> (replaced) [<a href="/pdf/2010.12059" title="Download PDF" id="pdf-2010.12059" aria-labelledby="pdf-2010.12059">pdf</a>, <a href="https://arxiv.org/html/2010.12059v2" title="View HTML" id="html-2010.12059" aria-labelledby="html-2010.12059" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2010.12059" title="Other formats" id="oth-2010.12059" aria-labelledby="oth-2010.12059">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Principled Interpolation in Normalizing Flows </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Fadel,+S+G">Samuel G. Fadel</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Mair,+S">Sebastian Mair</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=da+S.+Torres,+R">Ricardo da S. Torres</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Brefeld,+U">Ulf Brefeld</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 11 figures, accepted at the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD 2021) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Generative models based on normalizing flows are very successful in modeling complex data distributions using simpler ones. However, straightforward linear interpolations show unexpected side effects, as interpolation paths lie outside the area where samples are observed. This is caused by the standard choice of Gaussian base distributions and can be seen in the norms of the interpolated samples as they are outside the data manifold. This observation suggests that changing the way of interpolating should generally result in better interpolations, but it is not clear how to do that in an unambiguous way. In this paper, we solve this issue by enforcing a specific manifold and, hence, change the base distribution, to allow for a principled way of interpolation. Specifically, we use the Dirichlet and von Mises-Fisher base distributions on the probability simplex and the hypersphere, respectively. Our experimental results show superior performance in terms of bits per dimension, Fr茅chet Inception Distance (FID), and Kernel Inception Distance (KID) scores for interpolation, while maintaining the generative performance. </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2303.01353" title="Abstract" id="2303.01353"> arXiv:2303.01353 </a> (replaced) [<a href="/pdf/2303.01353" title="Download PDF" id="pdf-2303.01353" aria-labelledby="pdf-2303.01353">pdf</a>, <a href="https://arxiv.org/html/2303.01353v4" title="View HTML" id="html-2303.01353" aria-labelledby="html-2303.01353" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2303.01353" title="Other formats" id="oth-2303.01353" aria-labelledby="oth-2303.01353">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Penalising the biases in norm regularisation enforces sparsity </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Boursier,+E">Etienne Boursier</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Flammarion,+N">Nicolas Flammarion</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Corrected a mistake in the previous version of Theorem 4 (appendix) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Controlling the parameters' norm often yields good generalisation when training neural networks. Beyond simple intuitions, the relation between regularising parameters' norm and obtained estimators remains theoretically misunderstood. For one hidden ReLU layer networks with unidimensional data, this work shows the parameters' norm required to represent a function is given by the total variation of its second derivative, weighted by a $\sqrt{1+x^2}$ factor. Notably, this weighting factor disappears when the norm of bias terms is not regularised. The presence of this additional weighting factor is of utmost significance as it is shown to enforce the uniqueness and sparsity (in the number of kinks) of the minimal norm interpolator. Conversely, omitting the bias' norm allows for non-sparse solutions. Penalising the bias terms in the regularisation, either explicitly or implicitly, thus leads to sparse estimators. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2306.11908" title="Abstract" id="2306.11908"> arXiv:2306.11908 </a> (replaced) [<a href="/pdf/2306.11908" title="Download PDF" id="pdf-2306.11908" aria-labelledby="pdf-2306.11908">pdf</a>, <a href="https://arxiv.org/html/2306.11908v3" title="View HTML" id="html-2306.11908" aria-labelledby="html-2306.11908" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2306.11908" title="Other formats" id="oth-2306.11908" aria-labelledby="oth-2306.11908">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generalized Random Forests using Fixed-Point Trees </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Fleischer,+D">David Fleischer</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Stephens,+D+A">David A. Stephens</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Yang,+A">Archer Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 34 pages, 26 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Methodology (stat.ME) </div> <p class='mathjax'> We propose a computationally efficient alternative to generalized random forests <a href="https://arxiv.org/abs/1610.01271" data-arxiv-id="1610.01271" class="link-https">arXiv:1610.01271</a> (GRFs) for estimating heterogeneous effects in large dimensions. While GRFs rely on a gradient-based splitting criterion, which in large dimensions is computationally expensive and unstable, our method introduces a fixed-point approximation that eliminates the need for Jacobian estimation. This gradient-free approach preserves GRFs theoretical guarantees of consistency and asymptotic normality while significantly improving computational efficiency. We demonstrate that our method achieves multiple times the speed over standard GRFs without compromising statistical accuracy. Experiments on both simulated and real-world data, validate our approach. Our findings suggest that the proposed method is a scalable alternative for localized effect estimation in machine learning and causal inference applications. </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2405.05733" title="Abstract" id="2405.05733"> arXiv:2405.05733 </a> (replaced) [<a href="/pdf/2405.05733" title="Download PDF" id="pdf-2405.05733" aria-labelledby="pdf-2405.05733">pdf</a>, <a href="https://arxiv.org/html/2405.05733v3" title="View HTML" id="html-2405.05733" aria-labelledby="html-2405.05733" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.05733" title="Other formats" id="oth-2405.05733" aria-labelledby="oth-2405.05733">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Batched Stochastic Bandit for Nondegenerate Functions </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Liu,+Y">Yu Liu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Shu,+Y">Yunlu Shu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Wang,+T">Tianyu Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 34 pages, 14 colored figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> This paper studies batched bandit learning problems for nondegenerate functions. We introduce an algorithm that solves the batched bandit problem for nondegenerate functions near-optimally. More specifically, we introduce an algorithm, called Geometric Narrowing (GN), whose regret bound is of order $\widetilde{\mathcal{O}} ( A_{+}^d \sqrt{T} )$. In addition, GN only needs $\mathcal{O} (\log \log T)$ batches to achieve this regret. We also provide lower bound analysis for this problem. More specifically, we prove that over some (compact) doubling metric space of doubling dimension $d$: 1. For any policy $\pi$, there exists a problem instance on which $\pi$ admits a regret of order ${\Omega} ( A_-^d \sqrt{T})$; 2. No policy can achieve a regret of order $ A_-^d \sqrt{T} $ over all problem instances, using less than $ \Omega ( \log \log T ) $ rounds of communications. Our lower bound analysis shows that the GN algorithm achieves near optimal regret with minimal number of batches. </p> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2405.18220" title="Abstract" id="2405.18220"> arXiv:2405.18220 </a> (replaced) [<a href="/pdf/2405.18220" title="Download PDF" id="pdf-2405.18220" aria-labelledby="pdf-2405.18220">pdf</a>, <a href="https://arxiv.org/html/2405.18220v2" title="View HTML" id="html-2405.18220" aria-labelledby="html-2405.18220" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.18220" title="Other formats" id="oth-2405.18220" aria-labelledby="oth-2405.18220">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Non-negative Tensor Mixture Learning for Discrete Density Estimation </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Ghalamkari,+K">Kazu Ghalamkari</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Hinrich,+J+L">Jesper L酶ve Hinrich</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=M%C3%B8rup,+M">Morten M酶rup</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We present an expectation-maximization (EM) based unified framework for non-negative tensor decomposition that optimizes the Kullback-Leibler divergence. To avoid iterations in each M-step and learning rate tuning, we establish a general relationship between low-rank decompositions and many-body approximations. Using this connection, we exploit that the closed-form solution of the many-body approximation updates all parameters simultaneously in the M-step. Our framework offers not only a unified methodology for a variety of low-rank structures, including CP, Tucker, and Tensor Train decompositions, but also their mixtures. Notably, the weights of each low-rank tensor in the mixture can be learned from the data, which enables us to leverage the advantage of different low-rank structures without careful selection of the structure in advance. We empirically demonstrate that our framework overall provides superior generalization in terms of discrete density estimation and classification when compared to conventional tensor-based approaches. </p> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2410.05454" title="Abstract" id="2410.05454"> arXiv:2410.05454 </a> (replaced) [<a href="/pdf/2410.05454" title="Download PDF" id="pdf-2410.05454" aria-labelledby="pdf-2410.05454">pdf</a>, <a href="https://arxiv.org/html/2410.05454v2" title="View HTML" id="html-2410.05454" aria-labelledby="html-2410.05454" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.05454" title="Other formats" id="oth-2410.05454" aria-labelledby="oth-2410.05454">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Meta-Dynamical State Space Models for Integrative Neural Data Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Vermani,+A">Ayesha Vermani</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Nassar,+J">Josue Nassar</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Jeon,+H">Hyungju Jeon</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Dowling,+M">Matthew Dowling</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Park,+I+M">Il Memming Park</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Neurons and Cognition (q-bio.NC) </div> <p class='mathjax'> Learning shared structure across environments facilitates rapid learning and adaptive behavior in neural systems. This has been widely demonstrated and applied in machine learning to train models that are capable of generalizing to novel settings. However, there has been limited work exploiting the shared structure in neural activity during similar tasks for learning latent dynamics from neural recordings. Existing approaches are designed to infer dynamics from a single dataset and cannot be readily adapted to account for statistical heterogeneities across recordings. In this work, we hypothesize that similar tasks admit a corresponding family of related solutions and propose a novel approach for meta-learning this solution space from task-related neural activity of trained animals. Specifically, we capture the variabilities across recordings on a low-dimensional manifold which concisely parametrizes this family of dynamics, thereby facilitating rapid learning of latent dynamics given new recordings. We demonstrate the efficacy of our approach on few-shot reconstruction and forecasting of synthetic dynamical systems, and neural recordings from the motor cortex during different arm reaching tasks. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2410.09697" title="Abstract" id="2410.09697"> arXiv:2410.09697 </a> (replaced) [<a href="/pdf/2410.09697" title="Download PDF" id="pdf-2410.09697" aria-labelledby="pdf-2410.09697">pdf</a>, <a href="https://arxiv.org/html/2410.09697v2" title="View HTML" id="html-2410.09697" aria-labelledby="html-2410.09697" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.09697" title="Other formats" id="oth-2410.09697" aria-labelledby="oth-2410.09697">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Provable Convergence and Limitations of Geometric Tempering for Langevin Dynamics </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Chehab,+O">Omar Chehab</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Korba,+A">Anna Korba</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Stromme,+A">Austin Stromme</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Vacher,+A">Adrien Vacher</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Computation (stat.CO) </div> <p class='mathjax'> Geometric tempering is a popular approach to sampling from challenging multi-modal probability distributions by instead sampling from a sequence of distributions which interpolate, using the geometric mean, between an easier proposal distribution and the target distribution. In this paper, we theoretically investigate the soundness of this approach when the sampling algorithm is Langevin dynamics, proving both upper and lower bounds. Our upper bounds are the first analysis in the literature under functional inequalities. They assert the convergence of tempered Langevin in continuous and discrete-time, and their minimization leads to closed-form optimal tempering schedules for some pairs of proposal and target distributions. Our lower bounds demonstrate a simple case where the geometric tempering takes exponential time, and further reveal that the geometric tempering can suffer from poor functional inequalities and slow convergence, even when the target distribution is well-conditioned. Overall, our results indicate that geometric tempering may not help, and can even be harmful for convergence. </p> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2410.23602" title="Abstract" id="2410.23602"> arXiv:2410.23602 </a> (replaced) [<a href="/pdf/2410.23602" title="Download PDF" id="pdf-2410.23602" aria-labelledby="pdf-2410.23602">pdf</a>, <a href="https://arxiv.org/html/2410.23602v2" title="View HTML" id="html-2410.23602" aria-labelledby="html-2410.23602" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.23602" title="Other formats" id="oth-2410.23602" aria-labelledby="oth-2410.23602">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Linearized Wasserstein Barycenters: Synthesis, Analysis, Representational Capacity, and Applications </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Werenski,+M">Matthew Werenski</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Mallery,+B">Brendan Mallery</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Aeron,+S">Shuchin Aeron</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Murphy,+J+M">James M. Murphy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 40 pages, 6 figures Minor revisions and proof fixes, accepted to AISTATS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We propose the linear barycentric coding model (LBCM) which utilizes the linear optimal transport (LOT) metric for analysis and synthesis of probability measures. We provide a closed-form solution to the variational problem characterizing the probability measures in the LBCM and establish equivalence of the LBCM to the set of 2-Wasserstein barycenters in the special case of compatible measures. Computational methods for synthesizing and analyzing measures in the LBCM are developed with finite sample guarantees. One of our main theoretical contributions is to identify an LBCM, expressed in terms of a simple family, which is sufficient to express all probability measures on the closed unit interval. We show that a natural analogous construction of an LBCM in 2 dimensions fails, and we leave it as an open problem to identify the proper extension in more than 1 dimension. We conclude by demonstrating the utility of LBCM for covariance estimation and data imputation. </p> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2504.05004" title="Abstract" id="2504.05004"> arXiv:2504.05004 </a> (replaced) [<a href="/pdf/2504.05004" title="Download PDF" id="pdf-2504.05004" aria-labelledby="pdf-2504.05004">pdf</a>, <a href="https://arxiv.org/html/2504.05004v2" title="View HTML" id="html-2504.05004" aria-labelledby="html-2504.05004" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05004" title="Other formats" id="oth-2504.05004" aria-labelledby="oth-2504.05004">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Stacking Variational Bayesian Monte Carlo </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Silvestrin,+F">Francesco Silvestrin</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Li,+C">Chengkun Li</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Acerbi,+L">Luigi Acerbi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at the Workshop track of the 7th Symposium in Advances in Approximate Bayesian Inference (AABI 2025). 24 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Variational Bayesian Monte Carlo (VBMC) is a sample-efficient method for approximate Bayesian inference with computationally expensive likelihoods. While VBMC's local surrogate approach provides stable approximations, its conservative exploration strategy and limited evaluation budget can cause it to miss regions of complex posteriors. In this work, we introduce Stacking Variational Bayesian Monte Carlo (S-VBMC), a method that constructs global posterior approximations by merging independent VBMC runs through a principled and inexpensive post-processing step. Our approach leverages VBMC's mixture posterior representation and per-component evidence estimates, requiring no additional likelihood evaluations while being naturally parallelizable. We demonstrate S-VBMC's effectiveness on two synthetic problems designed to challenge VBMC's exploration capabilities and two real-world applications from computational neuroscience, showing substantial improvements in posterior approximation quality across all cases. </p> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2301.06650" title="Abstract" id="2301.06650"> arXiv:2301.06650 </a> (replaced) [<a href="/pdf/2301.06650" title="Download PDF" id="pdf-2301.06650" aria-labelledby="pdf-2301.06650">pdf</a>, <a href="https://arxiv.org/html/2301.06650v3" title="View HTML" id="html-2301.06650" aria-labelledby="html-2301.06650" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2301.06650" title="Other formats" id="oth-2301.06650" aria-labelledby="oth-2301.06650">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Probabilistic Traffic Forecasting with Dynamic Regression </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+V+Z">Vincent Zhihao Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+S">Seongjin Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+L">Lijun Sun</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Probabilistic Traffic Forecasting with Dynamic Regression. Transportation Science (2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> This paper proposes a dynamic regression (DR) framework that enhances existing deep spatiotemporal models by incorporating structured learning for the error process in traffic forecasting. The framework relaxes the assumption of time independence by modeling the error series of the base model (i.e., a well-established traffic forecasting model) using a matrix-variate autoregressive (AR) model. The AR model is integrated into training by redesigning the loss function. The newly designed loss function is based on the likelihood of a non-isotropic error term, enabling the model to generate probabilistic forecasts while preserving the original outputs of the base model. Importantly, the additional parameters introduced by the DR framework can be jointly optimized alongside the base model. Evaluation on state-of-the-art (SOTA) traffic forecasting models using speed and flow datasets demonstrates improved performance, with interpretable AR coefficients and spatiotemporal covariance matrices enhancing the understanding of the model. </p> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2304.02549" title="Abstract" id="2304.02549"> arXiv:2304.02549 </a> (replaced) [<a href="/pdf/2304.02549" title="Download PDF" id="pdf-2304.02549" aria-labelledby="pdf-2304.02549">pdf</a>, <a href="https://arxiv.org/html/2304.02549v2" title="View HTML" id="html-2304.02549" aria-labelledby="html-2304.02549" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2304.02549" title="Other formats" id="oth-2304.02549" aria-labelledby="oth-2304.02549">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Self-Supervised Siamese Autoencoders </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Baier,+F">Friederike Baier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mair,+S">Sebastian Mair</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fadel,+S+G">Samuel G. Fadel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 7 figures, accepted at Intelligent Data Analysis (IDA 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (stat.ML) </div> <p class='mathjax'> In contrast to fully-supervised models, self-supervised representation learning only needs a fraction of data to be labeled and often achieves the same or even higher downstream performance. The goal is to pre-train deep neural networks on a self-supervised task, making them able to extract meaningful features from raw input data afterwards. Previously, autoencoders and Siamese networks have been successfully employed as feature extractors for tasks such as image classification. However, both have their individual shortcomings and benefits. In this paper, we combine their complementary strengths by proposing a new method called SidAE (Siamese denoising autoencoder). Using an image classification downstream task, we show that our model outperforms two self-supervised baselines across multiple data sets and scenarios. Crucially, this includes conditions in which only a small amount of labeled data is available. Empirically, the Siamese component has more impact, but the denoising autoencoder is nevertheless necessary to improve performance. </p> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2305.15203" title="Abstract" id="2305.15203"> arXiv:2305.15203 </a> (replaced) [<a href="/pdf/2305.15203" title="Download PDF" id="pdf-2305.15203" aria-labelledby="pdf-2305.15203">pdf</a>, <a href="https://arxiv.org/html/2305.15203v3" title="View HTML" id="html-2305.15203" aria-labelledby="html-2305.15203" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2305.15203" title="Other formats" id="oth-2305.15203" aria-labelledby="oth-2305.15203">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Frequency maps reveal the correlation between Adversarial Attacks and Implicit Bias </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Basile,+L">Lorenzo Basile</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karantzas,+N">Nikos Karantzas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=d'Onofrio,+A">Alberto d'Onofrio</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manzoni,+L">Luca Manzoni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bortolussi,+L">Luca Bortolussi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rodriguez,+A">Alex Rodriguez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Anselmi,+F">Fabio Anselmi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at IJCNN 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Cryptography and Security (cs.CR); Machine Learning (stat.ML) </div> <p class='mathjax'> Despite their impressive performance in classification tasks, neural networks are known to be vulnerable to adversarial attacks, subtle perturbations of the input data designed to deceive the model. In this work, we investigate the correlation between these perturbations and the implicit bias of neural networks trained with gradient-based algorithms. To this end, we analyse a representation of the network's implicit bias through the lens of the Fourier transform. Specifically, we identify unique fingerprints of implicit bias and adversarial attacks by calculating the minimal, essential frequencies needed for accurate classification of each image, as well as the frequencies that drive misclassification in its adversarially perturbed counterpart. This approach enables us to uncover and analyse the correlation between these essential frequencies, providing a precise map of how the network's biases align or contrast with the frequency components exploited by adversarial attacks. To this end, among other methods, we use a newly introduced technique capable of detecting nonlinear correlations between high-dimensional datasets. Our results provide empirical evidence that the network bias in Fourier space and the target frequencies of adversarial attacks are highly correlated and suggest new potential strategies for adversarial defence. </p> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2310.09702" title="Abstract" id="2310.09702"> arXiv:2310.09702 </a> (replaced) [<a href="/pdf/2310.09702" title="Download PDF" id="pdf-2310.09702" aria-labelledby="pdf-2310.09702">pdf</a>, <a href="/format/2310.09702" title="Other formats" id="oth-2310.09702" aria-labelledby="oth-2310.09702">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Inference with Mondrian Random Forests </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Cattaneo,+M+D">Matias D. Cattaneo</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Klusowski,+J+M">Jason M. Klusowski</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Underwood,+W+G">William G. Underwood</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 64 pages, 1 figure, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Methodology (stat.ME); Machine Learning (stat.ML) </div> <p class='mathjax'> Random forests are popular methods for regression and classification analysis, and many different variants have been proposed in recent years. One interesting example is the Mondrian random forest, in which the underlying constituent trees are constructed via a Mondrian process. We give precise bias and variance characterizations, along with a Berry-Esseen-type central limit theorem, for the Mondrian random forest regression estimator. By combining these results with a carefully crafted debiasing approach and an accurate variance estimator, we present valid statistical inference methods for the unknown regression function. These methods come with explicitly characterized error bounds in terms of the sample size, tree complexity parameter, and number of trees in the forest, and include coverage error rates for feasible confidence interval estimators. Our novel debiasing procedure for the Mondrian random forest also allows it to achieve the minimax-optimal point estimation convergence rate in mean squared error for multivariate $\beta$-H枚lder regression functions, for all $\beta > 0$, provided that the underlying tuning parameters are chosen appropriately. Efficient and implementable algorithms are devised for both batch and online learning settings, and we study the computational complexity of different Mondrian random forest implementations. Finally, simulations with synthetic data validate our theory and methodology, demonstrating their excellent finite-sample properties. </p> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2312.10431" title="Abstract" id="2312.10431"> arXiv:2312.10431 </a> (replaced) [<a href="/pdf/2312.10431" title="Download PDF" id="pdf-2312.10431" aria-labelledby="pdf-2312.10431">pdf</a>, <a href="https://arxiv.org/html/2312.10431v5" title="View HTML" id="html-2312.10431" aria-labelledby="html-2312.10431" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.10431" title="Other formats" id="oth-2312.10431" aria-labelledby="oth-2312.10431">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Continuous Diffusion for Mixed-Type Tabular Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mueller,+M">Markus Mueller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gruber,+K">Kathrin Gruber</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fok,+D">Dennis Fok</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> published at ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Score-based generative models, commonly referred to as diffusion models, have proven to be successful at generating text and image data. However, their adaptation to mixed-type tabular data remains underexplored. In this work, we propose CDTD, a Continuous Diffusion model for mixed-type Tabular Data. CDTD is based on a novel combination of score matching and score interpolation to enforce a unified continuous noise distribution for both continuous and categorical features. We explicitly acknowledge the necessity of homogenizing distinct data types by relying on model-specific loss calibration and initialization <a href="http://schemes.To" rel="external noopener nofollow" class="link-external link-http">this http URL</a> further address the high heterogeneity in mixed-type tabular data, we introduce adaptive feature- or type-specific noise schedules. These ensure balanced generative performance across features and optimize the allocation of model capacity across features and diffusion time. Our experimental results show that CDTD consistently outperforms state-of-the-art benchmark models, captures feature correlations exceptionally well, and that heterogeneity in the noise schedule design boosts sample quality. Replication code is available at <a href="https://github.com/muellermarkus/cdtd" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2405.20769" title="Abstract" id="2405.20769"> arXiv:2405.20769 </a> (replaced) [<a href="/pdf/2405.20769" title="Download PDF" id="pdf-2405.20769" aria-labelledby="pdf-2405.20769">pdf</a>, <a href="https://arxiv.org/html/2405.20769v2" title="View HTML" id="html-2405.20769" aria-labelledby="html-2405.20769" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.20769" title="Other formats" id="oth-2405.20769" aria-labelledby="oth-2405.20769">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Avoiding Pitfalls for Privacy Accounting of Subsampled Mechanisms under Composition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lebeda,+C+J">Christian Janos Lebeda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Regehr,+M">Matthew Regehr</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kamath,+G">Gautam Kamath</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Steinke,+T">Thomas Steinke</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Data Structures and Algorithms (cs.DS); Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> We consider the problem of computing tight privacy guarantees for the composition of subsampled differentially private mechanisms. Recent algorithms can numerically compute the privacy parameters to arbitrary precision but must be carefully applied. <br>Our main contribution is to address two common points of confusion. First, some privacy accountants assume that the privacy guarantees for the composition of a subsampled mechanism are determined by self-composing the worst-case datasets for the uncomposed mechanism. We show that this is not true in general. Second, Poisson subsampling is sometimes assumed to have similar privacy guarantees compared to sampling without replacement. We show that the privacy guarantees may in fact differ significantly between the two sampling schemes. In particular, we give an example of hyperparameters that result in $\varepsilon \approx 1$ for Poisson subsampling and $\varepsilon > 10$ for sampling without replacement. This occurs for some parameters that could realistically be chosen for DP-SGD. </p> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2410.12779" title="Abstract" id="2410.12779"> arXiv:2410.12779 </a> (replaced) [<a href="/pdf/2410.12779" title="Download PDF" id="pdf-2410.12779" aria-labelledby="pdf-2410.12779">pdf</a>, <a href="/format/2410.12779" title="Other formats" id="oth-2410.12779" aria-labelledby="oth-2410.12779">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Geometry-Aware Generative Autoencoders for Warped Riemannian Metric Learning and Generative Modeling on Data Manifolds </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+X">Xingzhi Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liao,+D">Danqi Liao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=MacDonald,+K">Kincaid MacDonald</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yanlei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+C">Chen Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huguet,+G">Guillaume Huguet</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wolf,+G">Guy Wolf</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Adelstein,+I">Ian Adelstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rudner,+T+G+J">Tim G. J. Rudner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Krishnaswamy,+S">Smita Krishnaswamy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published in Proceedings of the 28th International Conference on Artificial Intelligence and Statistics (AISTATS 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Differential Geometry (math.DG); Machine Learning (stat.ML) </div> <p class='mathjax'> Rapid growth of high-dimensional datasets in fields such as single-cell RNA sequencing and spatial genomics has led to unprecedented opportunities for scientific discovery, but it also presents unique computational and statistical challenges. Traditional methods struggle with geometry-aware data generation, interpolation along meaningful trajectories, and transporting populations via feasible paths. To address these issues, we introduce Geometry-Aware Generative Autoencoder (GAGA), a novel framework that combines extensible manifold learning with generative modeling. GAGA constructs a neural network embedding space that respects the intrinsic geometries discovered by manifold learning and learns a novel warped Riemannian metric on the data space. This warped metric is derived from both the points on the data manifold and negative samples off the manifold, allowing it to characterize a meaningful geometry across the entire latent space. Using this metric, GAGA can uniformly sample points on the manifold, generate points along geodesics, and interpolate between populations across the learned manifold using geodesic-guided flows. GAGA shows competitive performance in simulated and real-world datasets, including a 30% improvement over the state-of-the-art methods in single-cell population-level trajectory inference. </p> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2410.13012" title="Abstract" id="2410.13012"> arXiv:2410.13012 </a> (replaced) [<a href="/pdf/2410.13012" title="Download PDF" id="pdf-2410.13012" aria-labelledby="pdf-2410.13012">pdf</a>, <a href="https://arxiv.org/html/2410.13012v3" title="View HTML" id="html-2410.13012" aria-labelledby="html-2410.13012" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.13012" title="Other formats" id="oth-2410.13012" aria-labelledby="oth-2410.13012">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sample Compression Scheme Reductions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Attias,+I">Idan Attias</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hanneke,+S">Steve Hanneke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramaswami,+A">Arvind Ramaswami</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> We present novel reductions from sample compression schemes in multiclass classification, regression, and adversarially robust learning settings to binary sample compression schemes. Assuming we have a compression scheme for binary classes of size $f(d_\mathrm{VC})$, where $d_\mathrm{VC}$ is the VC dimension, then we have the following results: (1) If the binary compression scheme is a majority-vote or a stable compression scheme, then there exists a multiclass compression scheme of size $O(f(d_\mathrm{G}))$, where $d_\mathrm{G}$ is the graph dimension. Moreover, for general binary compression schemes, we obtain a compression of size $O(f(d_\mathrm{G})\log|Y|)$, where $Y$ is the label space. (2) If the binary compression scheme is a majority-vote or a stable compression scheme, then there exists an $\epsilon$-approximate compression scheme for regression over $[0,1]$-valued functions of size $O(f(d_\mathrm{P}))$, where $d_\mathrm{P}$ is the pseudo-dimension. For general binary compression schemes, we obtain a compression of size $O(f(d_\mathrm{P})\log(1/\epsilon))$. These results would have significant implications if the sample compression conjecture, which posits that any binary concept class with a finite VC dimension admits a binary compression scheme of size $O(d_\mathrm{VC})$, is resolved (Littlestone and Warmuth, 1986; Floyd and Warmuth, 1995; Warmuth, 2003). Our results would then extend the proof of the conjecture immediately to other settings. We establish similar results for adversarially robust learning and also provide an example of a concept class that is robustly learnable but has no bounded-size compression scheme, demonstrating that learnability is not equivalent to having a compression scheme independent of the sample size, unlike in binary classification, where compression of size $2^{O(d_\mathrm{VC})}$ is attainable (Moran and Yehudayoff, 2016). </p> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2410.19426" title="Abstract" id="2410.19426"> arXiv:2410.19426 </a> (replaced) [<a href="/pdf/2410.19426" title="Download PDF" id="pdf-2410.19426" aria-labelledby="pdf-2410.19426">pdf</a>, <a href="https://arxiv.org/html/2410.19426v2" title="View HTML" id="html-2410.19426" aria-labelledby="html-2410.19426" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.19426" title="Other formats" id="oth-2410.19426" aria-labelledby="oth-2410.19426">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Analyzing Generative Models by Manifold Entropic Metrics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Galperin,+D">Daniel Galperin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=K%C3%B6the,+U">Ullrich K枚the</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Camera-ready version: accepted at AISTATS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Good generative models should not only synthesize high quality data, but also utilize interpretable representations that aid human understanding of their behavior. However, it is difficult to measure objectively if and to what degree desirable properties of disentangled representations have been achieved. Inspired by the principle of independent mechanisms, we address this difficulty by introducing a novel set of tractable information-theoretic evaluation metrics. We demonstrate the usefulness of our metrics on illustrative toy examples and conduct an in-depth comparison of various normalizing flow architectures and $\beta$-VAEs on the EMNIST dataset. Our method allows to sort latent features by importance and assess the amount of residual correlations of the resulting concepts. The most interesting finding of our experiments is a ranking of model architectures and training procedures in terms of their inductive bias to converge to aligned and disentangled representations during training. </p> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2411.02770" title="Abstract" id="2411.02770"> arXiv:2411.02770 </a> (replaced) [<a href="/pdf/2411.02770" title="Download PDF" id="pdf-2411.02770" aria-labelledby="pdf-2411.02770">pdf</a>, <a href="https://arxiv.org/html/2411.02770v3" title="View HTML" id="html-2411.02770" aria-labelledby="html-2411.02770" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.02770" title="Other formats" id="oth-2411.02770" aria-labelledby="oth-2411.02770">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A spectral mixture representation of isotropic kernels to generalize random Fourier features </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Langren%C3%A9,+N">Nicolas Langren茅</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Warin,+X">Xavier Warin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gruet,+P">Pierre Gruet</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 16 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Probability (math.PR); Computation (stat.CO); Machine Learning (stat.ML) </div> <p class='mathjax'> Rahimi and Recht (2007) introduced the idea of decomposing positive definite shift-invariant kernels by randomly sampling from their spectral distribution. This famous technique, known as Random Fourier Features (RFF), is in principle applicable to any such kernel whose spectral distribution can be identified and simulated. In practice, however, it is usually applied to the Gaussian kernel because of its simplicity, since its spectral distribution is also Gaussian. Clearly, simple spectral sampling formulas would be desirable for broader classes of kernels. In this paper, we show that the spectral distribution of positive definite isotropic kernels in $\mathbb{R}^{d}$ for all $d\geq1$ can be decomposed as a scale mixture of $\alpha$-stable random vectors, and we identify the mixing distribution as a function of the kernel. This constructive decomposition provides a simple and ready-to-use spectral sampling formula for many multivariate positive definite shift-invariant kernels, including exponential power kernels, generalized Mat茅rn kernels, generalized Cauchy kernels, as well as newly introduced kernels such as the Beta, Kummer, and Tricomi kernels. In particular, we retrieve the fact that the spectral distributions of these kernels are scale mixtures of the multivariate Gaussian distribution, along with an explicit mixing distribution formula. This result has broad applications for support vector machines, kernel ridge regression, Gaussian processes, and other kernel-based machine learning techniques for which the random Fourier features technique is applicable. </p> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2412.11692" title="Abstract" id="2412.11692"> arXiv:2412.11692 </a> (replaced) [<a href="/pdf/2412.11692" title="Download PDF" id="pdf-2412.11692" aria-labelledby="pdf-2412.11692">pdf</a>, <a href="https://arxiv.org/html/2412.11692v4" title="View HTML" id="html-2412.11692" aria-labelledby="html-2412.11692" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.11692" title="Other formats" id="oth-2412.11692" aria-labelledby="oth-2412.11692">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A partial likelihood approach to tree-based density modeling and its application in Bayesian inference </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Ma,+L">Li Ma</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bruni,+B">Benedetta Bruni</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Statistics Theory (math.ST); Computation (stat.CO); Machine Learning (stat.ML) </div> <p class='mathjax'> Tree-based priors for probability distributions are usually specified using a predetermined, data-independent collection of candidate recursive partitions of the sample space. To characterize an unknown target density in detail over the entire sample space, candidate partitions must have the capacity to expand deeply into all areas of the sample space with potential non-zero sampling probability. Such an expansive system of partitions often incurs prohibitive computational costs and makes inference prone to overfitting, especially in regions with little probability mass. Thus, existing models typically make a compromise and rely on relatively shallow trees. This hampers one of the most desirable features of trees, their ability to characterize local features, and results in reduced statistical efficiency. Traditional wisdom suggests that this compromise is inevitable to ensure coherent likelihood-based reasoning in Bayesian inference, as a data-dependent partition system that allows deeper expansion only in regions with more observations would induce double dipping of the data. We propose a simple strategy to restore coherency while allowing the candidate partitions to be data-dependent, using Cox's partial likelihood. Our partial likelihood approach is broadly applicable to existing likelihood-based methods and, in particular, to Bayesian inference on tree-based models. We give examples in density estimation in which the partial likelihood is endowed with existing priors on tree-based models and compare with the standard, full-likelihood approach. The results show substantial gains in estimation accuracy and computational efficiency from adopting the partial likelihood. </p> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2503.12645" title="Abstract" id="2503.12645"> arXiv:2503.12645 </a> (replaced) [<a href="/pdf/2503.12645" title="Download PDF" id="pdf-2503.12645" aria-labelledby="pdf-2503.12645">pdf</a>, <a href="/format/2503.12645" title="Other formats" id="oth-2503.12645" aria-labelledby="oth-2503.12645">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Understanding Gradient Orthogonalization for Deep Learning via Non-Euclidean Trust-Region Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kovalev,+D">Dmitry Kovalev</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Optimization and Control (math.OC); Machine Learning (stat.ML) </div> <p class='mathjax'> Optimization with matrix gradient orthogonalization has recently demonstrated impressive results in the training of deep neural networks (Jordan et al., 2024; Liu et al., 2025). In this paper, we provide a theoretical analysis of this approach. In particular, we show that the orthogonalized gradient method can be seen as a first-order trust-region optimization method, where the trust-region is defined in terms of the matrix spectral norm. Motivated by this observation, we develop the stochastic non-Euclidean trust-region gradient method with momentum, which recovers the Muon optimizer (Jordan et al., 2024) as a special case, along with normalized SGD and signSGD with momentum (Cutkosky and Mehta, 2020; Sun et al., 2023). In addition, we prove state-of-the-art convergence results for the proposed algorithm in a range of scenarios, which involve arbitrary non-Euclidean norms, constrained and composite problems, and non-convex, star-convex, first- and second-order smooth functions. Finally, our theoretical findings provide an explanation for several practical observations, including the practical superiority of Muon compared to the Orthogonal-SGDM algorithm of Tuddenham et al. (2022) and the importance of weight decay in the training of large-scale language models. </p> </div> </dd> <dt> <a name='item32'>[32]</a> <a href ="/abs/2503.21878" title="Abstract" id="2503.21878"> arXiv:2503.21878 </a> (replaced) [<a href="/pdf/2503.21878" title="Download PDF" id="pdf-2503.21878" aria-labelledby="pdf-2503.21878">pdf</a>, <a href="/format/2503.21878" title="Other formats" id="oth-2503.21878" aria-labelledby="oth-2503.21878">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Is Best-of-N the Best of Them? Coverage, Scaling, and Optimality in Inference-Time Alignment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+A">Audrey Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Block,+A">Adam Block</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qinghua Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+N">Nan Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Krishnamurthy,+A">Akshay Krishnamurthy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Foster,+D+J">Dylan J. Foster</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> Inference-time computation offers a powerful axis for scaling the performance of language models. However, naively increasing computation in techniques like Best-of-N sampling can lead to performance degradation due to reward hacking. Toward a theoretical understanding of how to best leverage additional computation, we focus on inference-time alignment, which we formalize as the problem of improving the quality of responses drawn from a pre-trained policy, given a prompt of interest and access to an imperfect reward model. We analyze the performance of inference-time alignment algorithms in terms of (i) response quality, and (ii) compute, and provide new results that highlight the importance of the pre-trained policy's coverage over high-quality responses for performance and compute scaling: <br>1. We show that Best-of-$N$ alignment with an ideal choice for $N$ can achieve optimal performance under stringent notions of coverage, but provably suffers from reward hacking when $N$ is large, and fails to achieve tight guarantees under more realistic coverage conditions. <br>2. We introduce $\texttt{InferenceTimePessimism}$, a new algorithm which mitigates reward hacking through deliberate use of inference-time compute, implementing the principle of pessimism in the face of uncertainty via rejection sampling; we prove that its performance is optimal and does not degrade with $N$, meaning it is scaling-monotonic. <br>We complement our theoretical results with an experimental evaluation that demonstrate the benefits of $\texttt{InferenceTimePessimism}$ across a variety of tasks and models. </p> </div> </dd> <dt> <a name='item33'>[33]</a> <a href ="/abs/2504.02618" title="Abstract" id="2504.02618"> arXiv:2504.02618 </a> (replaced) [<a href="/pdf/2504.02618" title="Download PDF" id="pdf-2504.02618" aria-labelledby="pdf-2504.02618">pdf</a>, <a href="/format/2504.02618" title="Other formats" id="oth-2504.02618" aria-labelledby="oth-2504.02618">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Variational Online Mirror Descent for Robust Learning in Schr枚dinger Bridge </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+D">Dong-Sig Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Jaein Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yoo,+H+B">Hee Bin Yoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+B">Byoung-Tak Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Sch枚dinger bridge (SB) has evolved into a universal class of probabilistic generative models. In practice, however, estimated learning signals are often uncertain, and the reliability promised by existing methods is often based on speculative optimal-case scenarios. Recent studies regarding the Sinkhorn algorithm through mirror descent (MD) have gained attention, revealing geometric insights into solution acquisition of the SB problems. In this paper, we propose a variational online MD (OMD) framework for the SB problems, which provides further stability to SB solvers. We formally prove convergence and a regret bound for the novel OMD formulation of SB acquisition. As a result, we propose a simulation-free SB algorithm called Variational Mirrored Schr枚dinger Bridge (VMSB) by utilizing the Wasserstein-Fisher-Rao geometry of the Gaussian mixture parameterization for Schr枚dinger potentials. Based on the Wasserstein gradient flow theory, the algorithm offers tractable learning dynamics that precisely approximate each OMD step. In experiments, we validate the performance of the proposed VMSB algorithm across an extensive suite of benchmarks. VMSB consistently outperforms contemporary SB solvers on a range of SB problems, demonstrating the robustness predicted by our theory. </p> </div> </dd> <dt> <a name='item34'>[34]</a> <a href ="/abs/2504.03152" title="Abstract" id="2504.03152"> arXiv:2504.03152 </a> (replaced) [<a href="/pdf/2504.03152" title="Download PDF" id="pdf-2504.03152" aria-labelledby="pdf-2504.03152">pdf</a>, <a href="https://arxiv.org/html/2504.03152v2" title="View HTML" id="html-2504.03152" aria-labelledby="html-2504.03152" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.03152" title="Other formats" id="oth-2504.03152" aria-labelledby="oth-2504.03152">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Safe Screening Rules for Group OWL Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bao,+R">Runxue Bao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Q">Quanchao Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yanfu Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Group Ordered Weighted $L_{1}$-Norm (Group OWL) regularized models have emerged as a useful procedure for high-dimensional sparse multi-task learning with correlated features. Proximal gradient methods are used as standard approaches to solving Group OWL models. However, Group OWL models usually suffer huge computational costs and memory usage when the feature size is large in the high-dimensional scenario. To address this challenge, in this paper, we are the first to propose the safe screening rule for Group OWL models by effectively tackling the structured non-separable penalty, which can quickly identify the inactive features that have zero coefficients across all the tasks. Thus, by removing the inactive features during the training process, we may achieve substantial computational gain and memory savings. More importantly, the proposed screening rule can be directly integrated with the existing solvers both in the batch and stochastic settings. Theoretically, we prove our screening rule is safe and also can be safely applied to the existing iterative optimization algorithms. Our experimental results demonstrate that our screening rule can effectively identify the inactive features and leads to a significant computational speedup without any loss of accuracy. </p> </div> </dd> <dt> <a name='item35'>[35]</a> <a href ="/abs/2504.05250" title="Abstract" id="2504.05250"> arXiv:2504.05250 </a> (replaced) [<a href="/pdf/2504.05250" title="Download PDF" id="pdf-2504.05250" aria-labelledby="pdf-2504.05250">pdf</a>, <a href="https://arxiv.org/html/2504.05250v2" title="View HTML" id="html-2504.05250" aria-labelledby="html-2504.05250" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2504.05250" title="Other formats" id="oth-2504.05250" aria-labelledby="oth-2504.05250">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PEAKS: Selecting Key Training Examples Incrementally via Prediction Error Anchored by Kernel Similarity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gurbuz,+M+B">Mustafa Burak Gurbuz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+X">Xingyu Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dovrolis,+C">Constantine Dovrolis</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> As deep learning continues to be driven by ever-larger datasets, understanding which examples are most important for generalization has become a critical question. While progress in data selection continues, emerging applications require studying this problem in dynamic contexts. To bridge this gap, we pose the Incremental Data Selection (IDS) problem, where examples arrive as a continuous stream, and need to be selected without access to the full data source. In this setting, the learner must incrementally build a training dataset of predefined size while simultaneously learning the underlying task. We find that in IDS, the impact of a new sample on the model state depends fundamentally on both its geometric relationship in the feature space and its prediction error. Leveraging this insight, we propose PEAKS (Prediction Error Anchored by Kernel Similarity), an efficient data selection method tailored for IDS. Our comprehensive evaluations demonstrate that PEAKS consistently outperforms existing selection strategies. Furthermore, PEAKS yields increasingly better performance returns than random selection as training data size grows on real-world datasets. </p> </div> </dd> </dl> <div class='paging'>Total of 35 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/stat.ML/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>