CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;29 of 29 results for author: <span class="mathjax">Eickenberg, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Eickenberg%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Eickenberg, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Eickenberg%2C+M&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Eickenberg, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02585">arXiv:2406.02585</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.02585">pdf</a>, <a href="https://arxiv.org/format/2406.02585">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Contextual Counting: A Mechanistic Study of Transformers on a Quantitative Task </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Golkar%2C+S">Siavash Golkar</a>, <a href="/search/cs?searchtype=author&amp;query=Bietti%2C+A">Alberto Bietti</a>, <a href="/search/cs?searchtype=author&amp;query=Pettee%2C+M">Mariel Pettee</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Cranmer%2C+M">Miles Cranmer</a>, <a href="/search/cs?searchtype=author&amp;query=Hirashima%2C+K">Keiya Hirashima</a>, <a href="/search/cs?searchtype=author&amp;query=Krawezik%2C+G">Geraud Krawezik</a>, <a href="/search/cs?searchtype=author&amp;query=Lourie%2C+N">Nicholas Lourie</a>, <a href="/search/cs?searchtype=author&amp;query=McCabe%2C+M">Michael McCabe</a>, <a href="/search/cs?searchtype=author&amp;query=Morel%2C+R">Rudy Morel</a>, <a href="/search/cs?searchtype=author&amp;query=Ohana%2C+R">Ruben Ohana</a>, <a href="/search/cs?searchtype=author&amp;query=Parker%2C+L+H">Liam Holden Parker</a>, <a href="/search/cs?searchtype=author&amp;query=Blancard%2C+B+R">Bruno R茅galdo-Saint Blancard</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02585v1-abstract-short" style="display: inline;"> Transformers have revolutionized machine learning across diverse domains, yet understanding their behavior remains crucial, particularly in high-stakes applications. This paper introduces the contextual counting task, a novel toy problem aimed at enhancing our understanding of Transformers in quantitative and scientific contexts. This task requires precise localization and computation within datas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02585v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02585v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02585v1-abstract-full" style="display: none;"> Transformers have revolutionized machine learning across diverse domains, yet understanding their behavior remains crucial, particularly in high-stakes applications. This paper introduces the contextual counting task, a novel toy problem aimed at enhancing our understanding of Transformers in quantitative and scientific contexts. This task requires precise localization and computation within datasets, akin to object detection or region-based scientific analysis. We present theoretical and empirical analysis using both causal and non-causal Transformer architectures, investigating the influence of various positional encodings on performance and interpretability. In particular, we find that causal attention is much better suited for the task, and that no positional embeddings lead to the best accuracy, though rotary embeddings are competitive and easier to train. We also show that out of distribution performance is tightly linked to which tokens it uses as a bias term. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02585v1-abstract-full').style.display = 'none'; document.getElementById('2406.02585v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02052">arXiv:2406.02052</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.02052">pdf</a>, <a href="https://arxiv.org/format/2406.02052">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> PETRA: Parallel End-to-end Training with Reversible Architectures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Rivaud%2C+S">St茅phane Rivaud</a>, <a href="/search/cs?searchtype=author&amp;query=Fournier%2C+L">Louis Fournier</a>, <a href="/search/cs?searchtype=author&amp;query=Pumir%2C+T">Thomas Pumir</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Oyallon%2C+E">Edouard Oyallon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02052v1-abstract-short" style="display: inline;"> Reversible architectures have been shown to be capable of performing on par with their non-reversible architectures, being applied in deep learning for memory savings and generative modeling. In this work, we show how reversible architectures can solve challenges in parallelizing deep model training. We introduce PETRA, a novel alternative to backpropagation for parallelizing gradient computations&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02052v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02052v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02052v1-abstract-full" style="display: none;"> Reversible architectures have been shown to be capable of performing on par with their non-reversible architectures, being applied in deep learning for memory savings and generative modeling. In this work, we show how reversible architectures can solve challenges in parallelizing deep model training. We introduce PETRA, a novel alternative to backpropagation for parallelizing gradient computations. PETRA facilitates effective model parallelism by enabling stages (i.e., a set of layers) to compute independently on different devices, while only needing to communicate activations and gradients between each other. By decoupling the forward and backward passes and keeping a single updated version of the parameters, the need for weight stashing is also removed. We develop a custom autograd-like training framework for PETRA, and we demonstrate its effectiveness on CIFAR-10, ImageNet32, and ImageNet, achieving competitive accuracies comparable to backpropagation using ResNet-18, ResNet-34, and ResNet-50 models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02052v1-abstract-full').style.display = 'none'; document.getElementById('2406.02052v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01365">arXiv:2406.01365</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.01365">pdf</a>, <a href="https://arxiv.org/format/2406.01365">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> From Feature Visualization to Visual Circuits: Effect of Adversarial Model Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nanfack%2C+G">Geraldin Nanfack</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01365v1-abstract-short" style="display: inline;"> Understanding the inner working functionality of large-scale deep neural networks is challenging yet crucial in several high-stakes applications. Mechanistic inter- pretability is an emergent field that tackles this challenge, often by identifying human-understandable subgraphs in deep neural networks known as circuits. In vision-pretrained models, these subgraphs are usually interpreted by visual&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01365v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01365v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01365v1-abstract-full" style="display: none;"> Understanding the inner working functionality of large-scale deep neural networks is challenging yet crucial in several high-stakes applications. Mechanistic inter- pretability is an emergent field that tackles this challenge, often by identifying human-understandable subgraphs in deep neural networks known as circuits. In vision-pretrained models, these subgraphs are usually interpreted by visualizing their node features through a popular technique called feature visualization. Recent works have analyzed the stability of different feature visualization types under the adversarial model manipulation framework. This paper starts by addressing limitations in existing works by proposing a novel attack called ProxPulse that simultaneously manipulates the two types of feature visualizations. Surprisingly, when analyzing these attacks under the umbrella of visual circuits, we find that visual circuits show some robustness to ProxPulse. We, therefore, introduce a new attack based on ProxPulse that unveils the manipulability of visual circuits, shedding light on their lack of robustness. The effectiveness of these attacks is validated using pre-trained AlexNet and ResNet-50 models on ImageNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01365v1-abstract-full').style.display = 'none'; document.getElementById('2406.01365v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04958">arXiv:2402.04958</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.04958">pdf</a>, <a href="https://arxiv.org/format/2402.04958">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Channel-Selective Normalization for Label-Shift Robust Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vianna%2C+P">Pedro Vianna</a>, <a href="/search/cs?searchtype=author&amp;query=Chaudhary%2C+M">Muawiz Chaudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Mehrbod%2C+P">Paria Mehrbod</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+A">An Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Cloutier%2C+G">Guy Cloutier</a>, <a href="/search/cs?searchtype=author&amp;query=Wolf%2C+G">Guy Wolf</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04958v2-abstract-short" style="display: inline;"> Deep neural networks have useful applications in many different tasks, however their performance can be severely affected by changes in the data distribution. For example, in the biomedical field, their performance can be affected by changes in the data (different machines, populations) between training and test datasets. To ensure robustness and generalization to real-world scenarios, test-time a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04958v2-abstract-full').style.display = 'inline'; document.getElementById('2402.04958v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04958v2-abstract-full" style="display: none;"> Deep neural networks have useful applications in many different tasks, however their performance can be severely affected by changes in the data distribution. For example, in the biomedical field, their performance can be affected by changes in the data (different machines, populations) between training and test datasets. To ensure robustness and generalization to real-world scenarios, test-time adaptation has been recently studied as an approach to adjust models to a new data distribution during inference. Test-time batch normalization is a simple and popular method that achieved compelling performance on domain shift benchmarks. It is implemented by recalculating batch normalization statistics on test batches. Prior work has focused on analysis with test data that has the same label distribution as the training data. However, in many practical applications this technique is vulnerable to label distribution shifts, sometimes producing catastrophic failure. This presents a risk in applying test time adaptation methods in deployment. We propose to tackle this challenge by only selectively adapting channels in a deep network, minimizing drastic adaptation that is sensitive to label shifts. Our selection scheme is based on two principles that we empirically motivate: (1) later layers of networks are more sensitive to label shift (2) individual features can be sensitive to specific classes. We apply the proposed technique to three classification tasks, including CIFAR10-C, Imagenet-C, and diagnosis of fatty liver, where we explore both covariate and label distribution shifts. We find that our method allows to bring the benefits of TTA while significantly reducing the risk of failure common in other methods, while being robust to choice in hyperparameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04958v2-abstract-full').style.display = 'none'; document.getElementById('2402.04958v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the Conference on Lifelong Learning Agents (CoLLAs) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15256">arXiv:2310.15256</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.15256">pdf</a>, <a href="https://arxiv.org/format/2310.15256">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SimBIG: Field-level Simulation-Based Inference of Galaxy Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lemos%2C+P">Pablo Lemos</a>, <a href="/search/cs?searchtype=author&amp;query=Parker%2C+L">Liam Parker</a>, <a href="/search/cs?searchtype=author&amp;query=Hahn%2C+C">ChangHoon Hahn</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+J">Jiamin Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Massara%2C+E">Elena Massara</a>, <a href="/search/cs?searchtype=author&amp;query=Modi%2C+C">Chirag Modi</a>, <a href="/search/cs?searchtype=author&amp;query=Dizgah%2C+A+M">Azadeh Moradinezhad Dizgah</a>, <a href="/search/cs?searchtype=author&amp;query=Blancard%2C+B+R">Bruno Regaldo-Saint Blancard</a>, <a href="/search/cs?searchtype=author&amp;query=Spergel%2C+D">David Spergel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15256v1-abstract-short" style="display: inline;"> We present the first simulation-based inference (SBI) of cosmological parameters from field-level analysis of galaxy clustering. Standard galaxy clustering analyses rely on analyzing summary statistics, such as the power spectrum, $P_\ell$, with analytic models based on perturbation theory. Consequently, they do not fully exploit the non-linear and non-Gaussian features of the galaxy distribution.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15256v1-abstract-full').style.display = 'inline'; document.getElementById('2310.15256v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15256v1-abstract-full" style="display: none;"> We present the first simulation-based inference (SBI) of cosmological parameters from field-level analysis of galaxy clustering. Standard galaxy clustering analyses rely on analyzing summary statistics, such as the power spectrum, $P_\ell$, with analytic models based on perturbation theory. Consequently, they do not fully exploit the non-linear and non-Gaussian features of the galaxy distribution. To address these limitations, we use the {\sc SimBIG} forward modelling framework to perform SBI using normalizing flows. We apply SimBIG to a subset of the BOSS CMASS galaxy sample using a convolutional neural network with stochastic weight averaging to perform massive data compression of the galaxy field. We infer constraints on $惟_m = 0.267^{+0.033}_{-0.029}$ and $蟽_8=0.762^{+0.036}_{-0.035}$. While our constraints on $惟_m$ are in-line with standard $P_\ell$ analyses, those on $蟽_8$ are $2.65\times$ tighter. Our analysis also provides constraints on the Hubble constant $H_0=64.5 \pm 3.8 \ {\rm km / s / Mpc}$ from galaxy clustering alone. This higher constraining power comes from additional non-Gaussian cosmological information, inaccessible with $P_\ell$. We demonstrate the robustness of our analysis by showcasing our ability to infer unbiased cosmological constraints from a series of test simulations that are constructed using different forward models than the one used in our training dataset. This work not only presents competitive cosmological constraints but also introduces novel methods for leveraging additional cosmological information in upcoming galaxy surveys like DESI, PFS, and Euclid. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15256v1-abstract-full').style.display = 'none'; document.getElementById('2310.15256v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 4 figures. A previous version of the paper was published in the ICML 2023 Workshop on Machine Learning for Astrophysics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.03024">arXiv:2310.03024</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.03024">pdf</a>, <a href="https://arxiv.org/format/2310.03024">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/mnras/stae1450">10.1093/mnras/stae1450 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AstroCLIP: A Cross-Modal Foundation Model for Galaxies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Parker%2C+L">Liam Parker</a>, <a href="/search/cs?searchtype=author&amp;query=Lanusse%2C+F">Francois Lanusse</a>, <a href="/search/cs?searchtype=author&amp;query=Golkar%2C+S">Siavash Golkar</a>, <a href="/search/cs?searchtype=author&amp;query=Sarra%2C+L">Leopoldo Sarra</a>, <a href="/search/cs?searchtype=author&amp;query=Cranmer%2C+M">Miles Cranmer</a>, <a href="/search/cs?searchtype=author&amp;query=Bietti%2C+A">Alberto Bietti</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Krawezik%2C+G">Geraud Krawezik</a>, <a href="/search/cs?searchtype=author&amp;query=McCabe%2C+M">Michael McCabe</a>, <a href="/search/cs?searchtype=author&amp;query=Ohana%2C+R">Ruben Ohana</a>, <a href="/search/cs?searchtype=author&amp;query=Pettee%2C+M">Mariel Pettee</a>, <a href="/search/cs?searchtype=author&amp;query=Blancard%2C+B+R">Bruno Regaldo-Saint Blancard</a>, <a href="/search/cs?searchtype=author&amp;query=Tesileanu%2C+T">Tiberiu Tesileanu</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.03024v2-abstract-short" style="display: inline;"> We present AstroCLIP, a single, versatile model that can embed both galaxy images and spectra into a shared, physically meaningful latent space. These embeddings can then be used - without any model fine-tuning - for a variety of downstream tasks including (1) accurate in-modality and cross-modality semantic similarity search, (2) photometric redshift estimation, (3) galaxy property estimation fro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03024v2-abstract-full').style.display = 'inline'; document.getElementById('2310.03024v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.03024v2-abstract-full" style="display: none;"> We present AstroCLIP, a single, versatile model that can embed both galaxy images and spectra into a shared, physically meaningful latent space. These embeddings can then be used - without any model fine-tuning - for a variety of downstream tasks including (1) accurate in-modality and cross-modality semantic similarity search, (2) photometric redshift estimation, (3) galaxy property estimation from both images and spectra, and (4) morphology classification. Our approach to implementing AstroCLIP consists of two parts. First, we embed galaxy images and spectra separately by pretraining separate transformer-based image and spectrum encoders in self-supervised settings. We then align the encoders using a contrastive loss. We apply our method to spectra from the Dark Energy Spectroscopic Instrument and images from its corresponding Legacy Imaging Survey. Overall, we find remarkable performance on all downstream tasks, even relative to supervised baselines. For example, for a task like photometric redshift prediction, we find similar performance to a specifically-trained ResNet18, and for additional tasks like physical property estimation (stellar mass, age, metallicity, and sSFR), we beat this supervised baseline by 19\% in terms of $R^2$. We also compare our results to a state-of-the-art self-supervised single-modal model for galaxy images, and find that our approach outperforms this benchmark by roughly a factor of two on photometric redshift estimation and physical property prediction in terms of $R^2$, while remaining roughly in-line in terms of morphology classification. Ultimately, our approach represents the first cross-modal self-supervised model for galaxies, and the first self-supervised transformer-based architectures for galaxy images and spectra. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03024v2-abstract-full').style.display = 'none'; document.getElementById('2310.03024v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, accepted in Monthly Notices of the Royal Astronomical Society, Presented at the NeurIPS 2023 AI4Science Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.02994">arXiv:2310.02994</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.02994">pdf</a>, <a href="https://arxiv.org/format/2310.02994">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Multiple Physics Pretraining for Physical Surrogate Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=McCabe%2C+M">Michael McCabe</a>, <a href="/search/cs?searchtype=author&amp;query=Blancard%2C+B+R">Bruno R茅galdo-Saint Blancard</a>, <a href="/search/cs?searchtype=author&amp;query=Parker%2C+L+H">Liam Holden Parker</a>, <a href="/search/cs?searchtype=author&amp;query=Ohana%2C+R">Ruben Ohana</a>, <a href="/search/cs?searchtype=author&amp;query=Cranmer%2C+M">Miles Cranmer</a>, <a href="/search/cs?searchtype=author&amp;query=Bietti%2C+A">Alberto Bietti</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Golkar%2C+S">Siavash Golkar</a>, <a href="/search/cs?searchtype=author&amp;query=Krawezik%2C+G">Geraud Krawezik</a>, <a href="/search/cs?searchtype=author&amp;query=Lanusse%2C+F">Francois Lanusse</a>, <a href="/search/cs?searchtype=author&amp;query=Pettee%2C+M">Mariel Pettee</a>, <a href="/search/cs?searchtype=author&amp;query=Tesileanu%2C+T">Tiberiu Tesileanu</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.02994v2-abstract-short" style="display: inline;"> We introduce multiple physics pretraining (MPP), an autoregressive task-agnostic pretraining approach for physical surrogate modeling of spatiotemporal systems with transformers. In MPP, rather than training one model on a specific physical system, we train a backbone model to predict the dynamics of multiple heterogeneous physical systems simultaneously in order to learn features that are broadly&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02994v2-abstract-full').style.display = 'inline'; document.getElementById('2310.02994v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.02994v2-abstract-full" style="display: none;"> We introduce multiple physics pretraining (MPP), an autoregressive task-agnostic pretraining approach for physical surrogate modeling of spatiotemporal systems with transformers. In MPP, rather than training one model on a specific physical system, we train a backbone model to predict the dynamics of multiple heterogeneous physical systems simultaneously in order to learn features that are broadly useful across systems and facilitate transfer. In order to learn effectively in this setting, we introduce a shared embedding and normalization strategy that projects the fields of multiple systems into a shared embedding space. We validate the efficacy of our approach on both pretraining and downstream tasks over a broad fluid mechanics-oriented benchmark. We show that a single MPP-pretrained transformer is able to match or outperform task-specific baselines on all pretraining sub-tasks without the need for finetuning. For downstream tasks, we demonstrate that finetuning MPP-trained models results in more accurate predictions across multiple time-steps on systems with previously unseen physical components or higher dimensional systems compared to training from scratch or finetuning pretrained video foundation models. We open-source our code and model weights trained at multiple scales for reproducibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02994v2-abstract-full').style.display = 'none'; document.getElementById('2310.02994v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.02989">arXiv:2310.02989</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.02989">pdf</a>, <a href="https://arxiv.org/format/2310.02989">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> xVal: A Continuous Numerical Tokenization for Scientific Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Golkar%2C+S">Siavash Golkar</a>, <a href="/search/cs?searchtype=author&amp;query=Pettee%2C+M">Mariel Pettee</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Bietti%2C+A">Alberto Bietti</a>, <a href="/search/cs?searchtype=author&amp;query=Cranmer%2C+M">Miles Cranmer</a>, <a href="/search/cs?searchtype=author&amp;query=Krawezik%2C+G">Geraud Krawezik</a>, <a href="/search/cs?searchtype=author&amp;query=Lanusse%2C+F">Francois Lanusse</a>, <a href="/search/cs?searchtype=author&amp;query=McCabe%2C+M">Michael McCabe</a>, <a href="/search/cs?searchtype=author&amp;query=Ohana%2C+R">Ruben Ohana</a>, <a href="/search/cs?searchtype=author&amp;query=Parker%2C+L">Liam Parker</a>, <a href="/search/cs?searchtype=author&amp;query=Blancard%2C+B+R">Bruno R茅galdo-Saint Blancard</a>, <a href="/search/cs?searchtype=author&amp;query=Tesileanu%2C+T">Tiberiu Tesileanu</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.02989v2-abstract-short" style="display: inline;"> Due in part to their discontinuous and discrete default encodings for numbers, Large Language Models (LLMs) have not yet been commonly used to process numerically-dense scientific datasets. Rendering datasets as text, however, could help aggregate diverse and multi-modal scientific data into a single training corpus, thereby potentially facilitating the development of foundation models for science&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02989v2-abstract-full').style.display = 'inline'; document.getElementById('2310.02989v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.02989v2-abstract-full" style="display: none;"> Due in part to their discontinuous and discrete default encodings for numbers, Large Language Models (LLMs) have not yet been commonly used to process numerically-dense scientific datasets. Rendering datasets as text, however, could help aggregate diverse and multi-modal scientific data into a single training corpus, thereby potentially facilitating the development of foundation models for science. In this work, we introduce xVal, a strategy for continuously tokenizing numbers within language models that results in a more appropriate inductive bias for scientific applications. By training specially-modified language models from scratch on a variety of scientific datasets formatted as text, we find that xVal generally outperforms other common numerical tokenization strategies on metrics including out-of-distribution generalization and computational efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02989v2-abstract-full').style.display = 'none'; document.getElementById('2310.02989v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 12 figures. Appendix: 8 pages, 2 figures. Accepted contribution at the NeurIPS Workshop on ML for the Physical Sciences</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.14362">arXiv:2307.14362</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.14362">pdf</a>, <a href="https://arxiv.org/format/2307.14362">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learnable wavelet neural networks for cosmological inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pedersen%2C+C">Christian Pedersen</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.14362v1-abstract-short" style="display: inline;"> Convolutional neural networks (CNNs) have been shown to both extract more information than the traditional two-point statistics from cosmological fields, and marginalise over astrophysical effects extremely well. However, CNNs require large amounts of training data, which is potentially problematic in the domain of expensive cosmological simulations, and it is difficult to interpret the network. I&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14362v1-abstract-full').style.display = 'inline'; document.getElementById('2307.14362v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.14362v1-abstract-full" style="display: none;"> Convolutional neural networks (CNNs) have been shown to both extract more information than the traditional two-point statistics from cosmological fields, and marginalise over astrophysical effects extremely well. However, CNNs require large amounts of training data, which is potentially problematic in the domain of expensive cosmological simulations, and it is difficult to interpret the network. In this work we apply the learnable scattering transform, a kind of convolutional neural network that uses trainable wavelets as filters, to the problem of cosmological inference and marginalisation over astrophysical effects. We present two models based on the scattering transform, one constructed for performance, and one constructed for interpretability, and perform a comparison with a CNN. We find that scattering architectures are able to outperform a CNN, significantly in the case of small training data samples. Additionally we present a lightweight scattering network that is highly interpretable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14362v1-abstract-full').style.display = 'none'; document.getElementById('2307.14362v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICML 2022 Workshop on Machine Learning for Astrophysics, Baltimore, Maryland, USA, 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.15012">arXiv:2306.15012</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.15012">pdf</a>, <a href="https://arxiv.org/format/2306.15012">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Statistical Component Separation for Targeted Signal Recovery in Noisy Mixtures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Blancard%2C+B+R">Bruno R茅galdo-Saint Blancard</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.15012v3-abstract-short" style="display: inline;"> Separating signals from an additive mixture may be an unnecessarily hard problem when one is only interested in specific properties of a given signal. In this work, we tackle simpler &#34;statistical component separation&#34; problems that focus on recovering a predefined set of statistical descriptors of a target signal from a noisy mixture. Assuming access to samples of the noise process, we investigate&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.15012v3-abstract-full').style.display = 'inline'; document.getElementById('2306.15012v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.15012v3-abstract-full" style="display: none;"> Separating signals from an additive mixture may be an unnecessarily hard problem when one is only interested in specific properties of a given signal. In this work, we tackle simpler &#34;statistical component separation&#34; problems that focus on recovering a predefined set of statistical descriptors of a target signal from a noisy mixture. Assuming access to samples of the noise process, we investigate a method devised to match the statistics of the solution candidate corrupted by noise samples with those of the observed mixture. We first analyze the behavior of this method using simple examples with analytically tractable calculations. Then, we apply it in an image denoising context employing 1) wavelet-based descriptors, 2) ConvNet-based descriptors on astrophysics and ImageNet data. In the case of 1), we show that our method better recovers the descriptors of the target data than a standard denoising method in most situations. Additionally, despite not constructed for this purpose, it performs surprisingly well in terms of peak signal-to-noise ratio on full signal reconstruction. In comparison, representation 2) appears less suitable for image denoising. Finally, we extend this method by introducing a diffusive stepwise algorithm which gives a new perspective to the initial method and leads to promising results for image denoising under specific circumstances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.15012v3-abstract-full').style.display = 'none'; document.getElementById('2306.15012v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13+17 pages, 6+8 figures, published in TMLR, code: https://github.com/bregaldo/stat_comp_sep</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.07397">arXiv:2306.07397</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.07397">pdf</a>, <a href="https://arxiv.org/format/2306.07397">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Attacks on the Interpretation of Neuron Activation Maximization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nanfack%2C+G">Geraldin Nanfack</a>, <a href="/search/cs?searchtype=author&amp;query=Fulleringer%2C+A">Alexander Fulleringer</a>, <a href="/search/cs?searchtype=author&amp;query=Marty%2C+J">Jonathan Marty</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.07397v1-abstract-short" style="display: inline;"> The internal functional behavior of trained Deep Neural Networks is notoriously difficult to interpret. Activation-maximization approaches are one set of techniques used to interpret and analyze trained deep-learning models. These consist in finding inputs that maximally activate a given neuron or feature map. These inputs can be selected from a data set or obtained by optimization. However, inter&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07397v1-abstract-full').style.display = 'inline'; document.getElementById('2306.07397v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.07397v1-abstract-full" style="display: none;"> The internal functional behavior of trained Deep Neural Networks is notoriously difficult to interpret. Activation-maximization approaches are one set of techniques used to interpret and analyze trained deep-learning models. These consist in finding inputs that maximally activate a given neuron or feature map. These inputs can be selected from a data set or obtained by optimization. However, interpretability methods may be subject to being deceived. In this work, we consider the concept of an adversary manipulating a model for the purpose of deceiving the interpretation. We propose an optimization framework for performing this manipulation and demonstrate a number of ways that popular activation-maximization interpretation techniques associated with CNNs can be manipulated to change the interpretations, shedding light on the reliability of these methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07397v1-abstract-full').style.display = 'none'; document.getElementById('2306.07397v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06968">arXiv:2306.06968</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.06968">pdf</a>, <a href="https://arxiv.org/format/2306.06968">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Can Forward Gradient Match Backpropagation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fournier%2C+L">Louis Fournier</a>, <a href="/search/cs?searchtype=author&amp;query=Rivaud%2C+S">St茅phane Rivaud</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Oyallon%2C+E">Edouard Oyallon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06968v1-abstract-short" style="display: inline;"> Forward Gradients - the idea of using directional derivatives in forward differentiation mode - have recently been shown to be utilizable for neural network training while avoiding problems generally associated with backpropagation gradient computation, such as locking and memorization requirements. The cost is the requirement to guess the step direction, which is hard in high dimensions. While c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06968v1-abstract-full').style.display = 'inline'; document.getElementById('2306.06968v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06968v1-abstract-full" style="display: none;"> Forward Gradients - the idea of using directional derivatives in forward differentiation mode - have recently been shown to be utilizable for neural network training while avoiding problems generally associated with backpropagation gradient computation, such as locking and memorization requirements. The cost is the requirement to guess the step direction, which is hard in high dimensions. While current solutions rely on weighted averages over isotropic guess vector distributions, we propose to strongly bias our gradient guesses in directions that are much more promising, such as feedback obtained from small, local auxiliary networks. For a standard computer vision neural network, we conduct a rigorous study systematically covering a variety of combinations of gradient targets and gradient guesses, including those previously presented in the literature. We find that using gradients obtained from a local loss as a candidate direction drastically improves on random noise in Forward Gradient methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06968v1-abstract-full').style.display = 'none'; document.getElementById('2306.06968v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Fortieth International Conference on Machine Learning, Jul 2023, Honolulu (Hawaii), USA, United States </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.07583">arXiv:2305.07583</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.07583">pdf</a>, <a href="https://arxiv.org/format/2305.07583">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> MoMo: Momentum Models for Adaptive Learning Rates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Schaipp%2C+F">Fabian Schaipp</a>, <a href="/search/cs?searchtype=author&amp;query=Ohana%2C+R">Ruben Ohana</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Defazio%2C+A">Aaron Defazio</a>, <a href="/search/cs?searchtype=author&amp;query=Gower%2C+R+M">Robert M. Gower</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.07583v3-abstract-short" style="display: inline;"> Training a modern machine learning architecture on a new task requires extensive learning-rate tuning, which comes at a high computational cost. Here we develop new Polyak-type adaptive learning rates that can be used on top of any momentum method, and require less tuning to perform well. We first develop MoMo, a Momentum Model based adaptive learning rate for SGD-M (stochastic gradient descent wi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07583v3-abstract-full').style.display = 'inline'; document.getElementById('2305.07583v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.07583v3-abstract-full" style="display: none;"> Training a modern machine learning architecture on a new task requires extensive learning-rate tuning, which comes at a high computational cost. Here we develop new Polyak-type adaptive learning rates that can be used on top of any momentum method, and require less tuning to perform well. We first develop MoMo, a Momentum Model based adaptive learning rate for SGD-M (stochastic gradient descent with momentum). MoMo uses momentum estimates of the losses and gradients sampled at each iteration to build a model of the loss function. Our model makes use of any known lower bound of the loss function by using truncation, e.g. most losses are lower-bounded by zero. The model is then approximately minimized at each iteration to compute the next step. We show how MoMo can be used in combination with any momentum-based method, and showcase this by developing MoMo-Adam, which is Adam with our new model-based adaptive learning rate. We show that MoMo attains a $\mathcal{O}(1/\sqrt{K})$ convergence rate for convex problems with interpolation, needing knowledge of no problem-specific quantities other than the optimal value. Additionally, for losses with unknown lower bounds, we develop on-the-fly estimates of a lower bound, that are incorporated in our model. We show that MoMo and MoMo-Adam improve over SGD-M and Adam in terms of robustness to hyperparameter tuning for training image classifiers on MNIST, CIFAR, and Imagenet, for recommender systems on Criteo, for a transformer model on the translation task IWSLT14, and for a diffusion model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07583v3-abstract-full').style.display = 'none'; document.getElementById('2305.07583v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 90C53; 74S60; 90C06; 62L20; 68W20; 15B52; 65Y20; 68W40 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.1.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.07635">arXiv:2301.07635</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.07635">pdf</a>, <a href="https://arxiv.org/format/2301.07635">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Local Learning with Neuron Groups </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Patel%2C+A">Adeetya Patel</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.07635v1-abstract-short" style="display: inline;"> Traditional deep network training methods optimize a monolithic objective function jointly for all the components. This can lead to various inefficiencies in terms of potential parallelization. Local learning is an approach to model-parallelism that removes the standard end-to-end learning setup and utilizes local objective functions to permit parallel learning amongst model components in a deep n&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.07635v1-abstract-full').style.display = 'inline'; document.getElementById('2301.07635v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.07635v1-abstract-full" style="display: none;"> Traditional deep network training methods optimize a monolithic objective function jointly for all the components. This can lead to various inefficiencies in terms of potential parallelization. Local learning is an approach to model-parallelism that removes the standard end-to-end learning setup and utilizes local objective functions to permit parallel learning amongst model components in a deep network. Recent works have demonstrated that variants of local learning can lead to efficient training of modern deep networks. However, in terms of how much computation can be distributed, these approaches are typically limited by the number of layers in a network. In this work we propose to study how local learning can be applied at the level of splitting layers or modules into sub-components, adding a notion of width-wise modularity to the existing depth-wise modularity associated with local learning. We investigate local-learning penalties that permit such models to be trained efficiently. Our experiments on the CIFAR-10, CIFAR-100, and Imagenet32 datasets demonstrate that introducing width-level modularity can lead to computational advantages over existing methods based on local learning and opens new opportunities for improved model-parallel distributed training. Code is available at: https://github.com/adeetyapatel12/GN-DGL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.07635v1-abstract-full').style.display = 'none'; document.getElementById('2301.07635v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.08435">arXiv:2207.08435</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.08435">pdf</a>, <a href="https://arxiv.org/format/2207.08435">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/2632-2153/acbb53">10.1088/2632-2153/acbb53 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Robust Simulation-Based Inference in Cosmology with Bayesian Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lemos%2C+P">Pablo Lemos</a>, <a href="/search/cs?searchtype=author&amp;query=Cranmer%2C+M">Miles Cranmer</a>, <a href="/search/cs?searchtype=author&amp;query=Abidi%2C+M">Muntazir Abidi</a>, <a href="/search/cs?searchtype=author&amp;query=Hahn%2C+C">ChangHoon Hahn</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Massara%2C+E">Elena Massara</a>, <a href="/search/cs?searchtype=author&amp;query=Yallup%2C+D">David Yallup</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+S">Shirley Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.08435v3-abstract-short" style="display: inline;"> Simulation-based inference (SBI) is rapidly establishing itself as a standard machine learning technique for analyzing data in cosmological surveys. Despite continual improvements to the quality of density estimation by learned models, applications of such techniques to real data are entirely reliant on the generalization power of neural networks far outside the training distribution, which is mos&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.08435v3-abstract-full').style.display = 'inline'; document.getElementById('2207.08435v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.08435v3-abstract-full" style="display: none;"> Simulation-based inference (SBI) is rapidly establishing itself as a standard machine learning technique for analyzing data in cosmological surveys. Despite continual improvements to the quality of density estimation by learned models, applications of such techniques to real data are entirely reliant on the generalization power of neural networks far outside the training distribution, which is mostly unconstrained. Due to the imperfections in scientist-created simulations, and the large computational expense of generating all possible parameter combinations, SBI methods in cosmology are vulnerable to such generalization issues. Here, we discuss the effects of both issues, and show how using a Bayesian neural network framework for training SBI can mitigate biases, and result in more reliable inference outside the training set. We introduce cosmoSWAG, the first application of Stochastic Weight Averaging to cosmology, and apply it to SBI trained for inference on the cosmic microwave background. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.08435v3-abstract-full').style.display = 'none'; document.getElementById('2207.08435v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures. Preliminary version accepted at the ML4Astro Machine Learning for Astrophysics Workshop at the Thirty-ninth International Conference on Machine Learning (ICML 2022). Final version published at Machine Learning: Science and Technology</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Mach. Learn.: Sci. Technol. 4 01LT01 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.01300">arXiv:2201.01300</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2201.01300">pdf</a>, <a href="https://arxiv.org/format/2201.01300">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3847/1538-4365/acbf47">10.3847/1538-4365/acbf47 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The CAMELS project: public data release </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Villaescusa-Navarro%2C+F">Francisco Villaescusa-Navarro</a>, <a href="/search/cs?searchtype=author&amp;query=Genel%2C+S">Shy Genel</a>, <a href="/search/cs?searchtype=author&amp;query=Angl%C3%A9s-Alc%C3%A1zar%2C+D">Daniel Angl茅s-Alc谩zar</a>, <a href="/search/cs?searchtype=author&amp;query=Perez%2C+L+A">Lucia A. Perez</a>, <a href="/search/cs?searchtype=author&amp;query=Villanueva-Domingo%2C+P">Pablo Villanueva-Domingo</a>, <a href="/search/cs?searchtype=author&amp;query=Wadekar%2C+D">Digvijay Wadekar</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+H">Helen Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Mohammad%2C+F+G">Faizan G. Mohammad</a>, <a href="/search/cs?searchtype=author&amp;query=Hassan%2C+S">Sultan Hassan</a>, <a href="/search/cs?searchtype=author&amp;query=Moser%2C+E">Emily Moser</a>, <a href="/search/cs?searchtype=author&amp;query=Lau%2C+E+T">Erwin T. Lau</a>, <a href="/search/cs?searchtype=author&amp;query=Valle%2C+L+F+M+P">Luis Fernando Machado Poletti Valle</a>, <a href="/search/cs?searchtype=author&amp;query=Nicola%2C+A">Andrina Nicola</a>, <a href="/search/cs?searchtype=author&amp;query=Thiele%2C+L">Leander Thiele</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yongseok Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Philcox%2C+O+H+E">Oliver H. E. Philcox</a>, <a href="/search/cs?searchtype=author&amp;query=Oppenheimer%2C+B+D">Benjamin D. Oppenheimer</a>, <a href="/search/cs?searchtype=author&amp;query=Tillman%2C+M">Megan Tillman</a>, <a href="/search/cs?searchtype=author&amp;query=Hahn%2C+C">ChangHoon Hahn</a>, <a href="/search/cs?searchtype=author&amp;query=Kaushal%2C+N">Neerav Kaushal</a>, <a href="/search/cs?searchtype=author&amp;query=Pisani%2C+A">Alice Pisani</a>, <a href="/search/cs?searchtype=author&amp;query=Gebhardt%2C+M">Matthew Gebhardt</a>, <a href="/search/cs?searchtype=author&amp;query=Delgado%2C+A+M">Ana Maria Delgado</a>, <a href="/search/cs?searchtype=author&amp;query=Caliendo%2C+J">Joyce Caliendo</a>, <a href="/search/cs?searchtype=author&amp;query=Kreisch%2C+C">Christina Kreisch</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.01300v1-abstract-short" style="display: inline;"> The Cosmology and Astrophysics with MachinE Learning Simulations (CAMELS) project was developed to combine cosmology with astrophysics through thousands of cosmological hydrodynamic simulations and machine learning. CAMELS contains 4,233 cosmological simulations, 2,049 N-body and 2,184 state-of-the-art hydrodynamic simulations that sample a vast volume in parameter space. In this paper we present&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.01300v1-abstract-full').style.display = 'inline'; document.getElementById('2201.01300v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.01300v1-abstract-full" style="display: none;"> The Cosmology and Astrophysics with MachinE Learning Simulations (CAMELS) project was developed to combine cosmology with astrophysics through thousands of cosmological hydrodynamic simulations and machine learning. CAMELS contains 4,233 cosmological simulations, 2,049 N-body and 2,184 state-of-the-art hydrodynamic simulations that sample a vast volume in parameter space. In this paper we present the CAMELS public data release, describing the characteristics of the CAMELS simulations and a variety of data products generated from them, including halo, subhalo, galaxy, and void catalogues, power spectra, bispectra, Lyman-$伪$ spectra, probability distribution functions, halo radial profiles, and X-rays photon lists. We also release over one thousand catalogues that contain billions of galaxies from CAMELS-SAM: a large collection of N-body simulations that have been combined with the Santa Cruz Semi-Analytic Model. We release all the data, comprising more than 350 terabytes and containing 143,922 snapshots, millions of halos, galaxies and summary statistics. We provide further technical details on how to access, download, read, and process the data at \url{https://camels.readthedocs.io}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.01300v1-abstract-full').style.display = 'none'; document.getElementById('2201.01300v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 3 figures. More than 350 Tb of data from thousands of simulations publicly available at https://www.camel-simulations.org</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.10915">arXiv:2109.10915</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.10915">pdf</a>, <a href="https://arxiv.org/format/2109.10915">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3847/1538-4365/ac5ab0">10.3847/1538-4365/ac5ab0 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The CAMELS Multifield Dataset: Learning the Universe&#39;s Fundamental Parameters with Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Villaescusa-Navarro%2C+F">Francisco Villaescusa-Navarro</a>, <a href="/search/cs?searchtype=author&amp;query=Genel%2C+S">Shy Genel</a>, <a href="/search/cs?searchtype=author&amp;query=Angles-Alcazar%2C+D">Daniel Angles-Alcazar</a>, <a href="/search/cs?searchtype=author&amp;query=Thiele%2C+L">Leander Thiele</a>, <a href="/search/cs?searchtype=author&amp;query=Dave%2C+R">Romeel Dave</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+D">Desika Narayanan</a>, <a href="/search/cs?searchtype=author&amp;query=Nicola%2C+A">Andrina Nicola</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Villanueva-Domingo%2C+P">Pablo Villanueva-Domingo</a>, <a href="/search/cs?searchtype=author&amp;query=Wandelt%2C+B">Benjamin Wandelt</a>, <a href="/search/cs?searchtype=author&amp;query=Spergel%2C+D+N">David N. Spergel</a>, <a href="/search/cs?searchtype=author&amp;query=Somerville%2C+R+S">Rachel S. Somerville</a>, <a href="/search/cs?searchtype=author&amp;query=Matilla%2C+J+M+Z">Jose Manuel Zorrilla Matilla</a>, <a href="/search/cs?searchtype=author&amp;query=Mohammad%2C+F+G">Faizan G. Mohammad</a>, <a href="/search/cs?searchtype=author&amp;query=Hassan%2C+S">Sultan Hassan</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+H">Helen Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Wadekar%2C+D">Digvijay Wadekar</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+K+W+K">Kaze W. K. Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Contardo%2C+G">Gabriella Contardo</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yongseok Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Moser%2C+E">Emily Moser</a>, <a href="/search/cs?searchtype=author&amp;query=Lau%2C+E+T">Erwin T. Lau</a>, <a href="/search/cs?searchtype=author&amp;query=Valle%2C+L+F+M+P">Luis Fernando Machado Poletti Valle</a>, <a href="/search/cs?searchtype=author&amp;query=Perez%2C+L+A">Lucia A. Perez</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.10915v1-abstract-short" style="display: inline;"> We present the Cosmology and Astrophysics with MachinE Learning Simulations (CAMELS) Multifield Dataset, CMD, a collection of hundreds of thousands of 2D maps and 3D grids containing many different properties of cosmic gas, dark matter, and stars from 2,000 distinct simulated universes at several cosmic times. The 2D maps and 3D grids represent cosmic regions that span $\sim$100 million light year&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.10915v1-abstract-full').style.display = 'inline'; document.getElementById('2109.10915v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.10915v1-abstract-full" style="display: none;"> We present the Cosmology and Astrophysics with MachinE Learning Simulations (CAMELS) Multifield Dataset, CMD, a collection of hundreds of thousands of 2D maps and 3D grids containing many different properties of cosmic gas, dark matter, and stars from 2,000 distinct simulated universes at several cosmic times. The 2D maps and 3D grids represent cosmic regions that span $\sim$100 million light years and have been generated from thousands of state-of-the-art hydrodynamic and gravity-only N-body simulations from the CAMELS project. Designed to train machine learning models, CMD is the largest dataset of its kind containing more than 70 Terabytes of data. In this paper we describe CMD in detail and outline a few of its applications. We focus our attention on one such task, parameter inference, formulating the problems we face as a challenge to the community. We release all data and provide further technical details at https://camels-multifield-dataset.readthedocs.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.10915v1-abstract-full').style.display = 'none'; document.getElementById('2109.10915v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 1 figure. Third paper of a series of four. Hundreds of thousands of labeled 2D maps and 3D grids from thousands of simulated universes publicly available at https://camels-multifield-dataset.readthedocs.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.09539">arXiv:2107.09539</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2107.09539">pdf</a>, <a href="https://arxiv.org/format/2107.09539">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Parametric Scattering Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gauthier%2C+S">Shanel Gauthier</a>, <a href="/search/cs?searchtype=author&amp;query=Th%C3%A9rien%2C+B">Benjamin Th茅rien</a>, <a href="/search/cs?searchtype=author&amp;query=Als%C3%A8ne-Racicot%2C+L">Laurent Als猫ne-Racicot</a>, <a href="/search/cs?searchtype=author&amp;query=Chaudhary%2C+M">Muawiz Chaudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Rish%2C+I">Irina Rish</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Wolf%2C+G">Guy Wolf</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.09539v4-abstract-short" style="display: inline;"> The wavelet scattering transform creates geometric invariants and deformation stability. In multiple signal domains, it has been shown to yield more discriminative representations compared to other non-learned representations and to outperform learned representations in certain tasks, particularly on limited labeled data and highly structured signals. The wavelet filters used in the scattering tra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.09539v4-abstract-full').style.display = 'inline'; document.getElementById('2107.09539v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.09539v4-abstract-full" style="display: none;"> The wavelet scattering transform creates geometric invariants and deformation stability. In multiple signal domains, it has been shown to yield more discriminative representations compared to other non-learned representations and to outperform learned representations in certain tasks, particularly on limited labeled data and highly structured signals. The wavelet filters used in the scattering transform are typically selected to create a tight frame via a parameterized mother wavelet. In this work, we investigate whether this standard wavelet filterbank construction is optimal. Focusing on Morlet wavelets, we propose to learn the scales, orientations, and aspect ratios of the filters to produce problem-specific parameterizations of the scattering transform. We show that our learned versions of the scattering transform yield significant performance gains in small-sample classification settings over the standard scattering transform. Moreover, our empirical results suggest that traditional filterbank constructions may not always be necessary for scattering transforms to extract effective representations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.09539v4-abstract-full').style.display = 'none'; document.getElementById('2107.09539v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.2; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.06401">arXiv:2106.06401</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.06401">pdf</a>, <a href="https://arxiv.org/format/2106.06401">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Decoupled Greedy Learning of CNNs for Synchronous and Asynchronous Distributed Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Leconte%2C+L">Louis Leconte</a>, <a href="/search/cs?searchtype=author&amp;query=Caccia%2C+L">Lucas Caccia</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Oyallon%2C+E">Edouard Oyallon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.06401v1-abstract-short" style="display: inline;"> A commonly cited inefficiency of neural network training using back-propagation is the update locking problem: each layer must wait for the signal to propagate through the full network before updating. Several alternatives that can alleviate this issue have been proposed. In this context, we consider a simple alternative based on minimal feedback, which we call Decoupled Greedy Learning (DGL). It&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.06401v1-abstract-full').style.display = 'inline'; document.getElementById('2106.06401v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.06401v1-abstract-full" style="display: none;"> A commonly cited inefficiency of neural network training using back-propagation is the update locking problem: each layer must wait for the signal to propagate through the full network before updating. Several alternatives that can alleviate this issue have been proposed. In this context, we consider a simple alternative based on minimal feedback, which we call Decoupled Greedy Learning (DGL). It is based on a classic greedy relaxation of the joint training objective, recently shown to be effective in the context of Convolutional Neural Networks (CNNs) on large-scale image classification. We consider an optimization of this objective that permits us to decouple the layer training, allowing for layers or modules in networks to be trained with a potentially linear parallelization. With the use of a replay buffer we show that this approach can be extended to asynchronous settings, where modules can operate and continue to update with possibly large communication delays. To address bandwidth and memory issues we propose an approach based on online vector quantization. This allows to drastically reduce the communication bandwidth between modules and required memory for replay buffers. We show theoretically and empirically that this approach converges and compare it to the sequential solvers. We demonstrate the effectiveness of DGL against alternative approaches on the CIFAR-10 dataset and on the large-scale ImageNet dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.06401v1-abstract-full').style.display = 'none'; document.getElementById('2106.06401v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:1901.08164</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.07386">arXiv:2012.07386</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2012.07386">pdf</a>, <a href="https://arxiv.org/format/2012.07386">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Phase Retrieval with Holography and Untrained Priors: Tackling the Challenges of Low-Photon Nanoscale Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lawrence%2C+H">Hannah Lawrence</a>, <a href="/search/cs?searchtype=author&amp;query=Barmherzig%2C+D+A">David A. Barmherzig</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Henry Li</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Gabri%C3%A9%2C+M">Marylou Gabri茅</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.07386v3-abstract-short" style="display: inline;"> Phase retrieval is the inverse problem of recovering a signal from magnitude-only Fourier measurements, and underlies numerous imaging modalities, such as Coherent Diffraction Imaging (CDI). A variant of this setup, known as holography, includes a reference object that is placed adjacent to the specimen of interest before measurements are collected. The resulting inverse problem, known as holograp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.07386v3-abstract-full').style.display = 'inline'; document.getElementById('2012.07386v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.07386v3-abstract-full" style="display: none;"> Phase retrieval is the inverse problem of recovering a signal from magnitude-only Fourier measurements, and underlies numerous imaging modalities, such as Coherent Diffraction Imaging (CDI). A variant of this setup, known as holography, includes a reference object that is placed adjacent to the specimen of interest before measurements are collected. The resulting inverse problem, known as holographic phase retrieval, is well-known to have improved problem conditioning relative to the original. This innovation, i.e. Holographic CDI, becomes crucial at the nanoscale, where imaging specimens such as viruses, proteins, and crystals require low-photon measurements. This data is highly corrupted by Poisson shot noise, and often lacks low-frequency content as well. In this work, we introduce a dataset-free deep learning framework for holographic phase retrieval adapted to these challenges. The key ingredients of our approach are the explicit and flexible incorporation of the physical forward model into an automatic differentiation procedure, the Poisson log-likelihood objective function, and an optional untrained deep image prior. We perform extensive evaluation under realistic conditions. Compared to competing classical methods, our method recovers signal from higher noise levels and is more resilient to suboptimal reference design, as well as to large missing regions of low frequencies in the observations. Finally, we show that these properties carry over to experimental data acquired on optical wavelengths. To the best of our knowledge, this is the first work to consider a dataset-free machine learning approach for holographic phase retrieval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.07386v3-abstract-full').style.display = 'none'; document.getElementById('2012.07386v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.08164">arXiv:1901.08164</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1901.08164">pdf</a>, <a href="https://arxiv.org/format/1901.08164">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Decoupled Greedy Learning of CNNs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Oyallon%2C+E">Edouard Oyallon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.08164v4-abstract-short" style="display: inline;"> A commonly cited inefficiency of neural network training by back-propagation is the update locking problem: each layer must wait for the signal to propagate through the full network before updating. Several alternatives that can alleviate this issue have been proposed. In this context, we consider a simpler, but more effective, substitute that uses minimal feedback, which we call Decoupled Greedy&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.08164v4-abstract-full').style.display = 'inline'; document.getElementById('1901.08164v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.08164v4-abstract-full" style="display: none;"> A commonly cited inefficiency of neural network training by back-propagation is the update locking problem: each layer must wait for the signal to propagate through the full network before updating. Several alternatives that can alleviate this issue have been proposed. In this context, we consider a simpler, but more effective, substitute that uses minimal feedback, which we call Decoupled Greedy Learning (DGL). It is based on a greedy relaxation of the joint training objective, recently shown to be effective in the context of Convolutional Neural Networks (CNNs) on large-scale image classification. We consider an optimization of this objective that permits us to decouple the layer training, allowing for layers or modules in networks to be trained with a potentially linear parallelization in layers. With the use of a replay buffer we show this approach can be extended to asynchronous settings, where modules can operate with possibly large communication delays. We show theoretically and empirically that this approach converges. Then, we empirically find that it can lead to better generalization than sequential greedy optimization. We demonstrate the effectiveness of DGL against alternative approaches on the CIFAR-10 dataset and on the large-scale ImageNet dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.08164v4-abstract-full').style.display = 'none'; document.getElementById('1901.08164v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.11446">arXiv:1812.11446</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1812.11446">pdf</a>, <a href="https://arxiv.org/format/1812.11446">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Greedy Layerwise Learning Can Scale to ImageNet </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Oyallon%2C+E">Edouard Oyallon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.11446v3-abstract-short" style="display: inline;"> Shallow supervised 1-hidden layer neural networks have a number of favorable properties that make them easier to interpret, analyze, and optimize than their deep counterparts, but lack their representational power. Here we use 1-hidden layer learning problems to sequentially build deep networks layer by layer, which can inherit properties from shallow networks. Contrary to previous approaches usin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.11446v3-abstract-full').style.display = 'inline'; document.getElementById('1812.11446v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.11446v3-abstract-full" style="display: none;"> Shallow supervised 1-hidden layer neural networks have a number of favorable properties that make them easier to interpret, analyze, and optimize than their deep counterparts, but lack their representational power. Here we use 1-hidden layer learning problems to sequentially build deep networks layer by layer, which can inherit properties from shallow networks. Contrary to previous approaches using shallow networks, we focus on problems where deep learning is reported as critical for success. We thus study CNNs on image classification tasks using the large-scale ImageNet dataset and the CIFAR-10 dataset. Using a simple set of ideas for architecture and training we find that solving sequential 1-hidden-layer auxiliary problems lead to a CNN that exceeds AlexNet performance on ImageNet. Extending this training methodology to construct individual layers by solving 2-and-3-hidden layer auxiliary problems, we obtain an 11-layer network that exceeds several members of the VGG model family on ImageNet, and can train a VGG-11 model to the same accuracy as end-to-end learning. To our knowledge, this is the first competitive alternative to end-to-end training of CNNs that can scale to ImageNet. We illustrate several interesting properties of these models theoretically and conduct a range of experiments to study the properties this training induces on the intermediate layers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.11446v3-abstract-full').style.display = 'none'; document.getElementById('1812.11446v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.11214">arXiv:1812.11214</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1812.11214">pdf</a>, <a href="https://arxiv.org/ps/1812.11214">ps</a>, <a href="https://arxiv.org/format/1812.11214">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Kymatio: Scattering Transforms in Python </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Andreux%2C+M">Mathieu Andreux</a>, <a href="/search/cs?searchtype=author&amp;query=Angles%2C+T">Tom谩s Angles</a>, <a href="/search/cs?searchtype=author&amp;query=Exarchakis%2C+G">Georgios Exarchakis</a>, <a href="/search/cs?searchtype=author&amp;query=Leonarduzzi%2C+R">Roberto Leonarduzzi</a>, <a href="/search/cs?searchtype=author&amp;query=Rochette%2C+G">Gaspar Rochette</a>, <a href="/search/cs?searchtype=author&amp;query=Thiry%2C+L">Louis Thiry</a>, <a href="/search/cs?searchtype=author&amp;query=Zarka%2C+J">John Zarka</a>, <a href="/search/cs?searchtype=author&amp;query=Mallat%2C+S">St茅phane Mallat</a>, <a href="/search/cs?searchtype=author&amp;query=and%C3%A9n%2C+J">Joakim and茅n</a>, <a href="/search/cs?searchtype=author&amp;query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&amp;query=Bruna%2C+J">Joan Bruna</a>, <a href="/search/cs?searchtype=author&amp;query=Lostanlen%2C+V">Vincent Lostanlen</a>, <a href="/search/cs?searchtype=author&amp;query=Chaudhary%2C+M">Muawiz Chaudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Hirn%2C+M+J">Matthew J. Hirn</a>, <a href="/search/cs?searchtype=author&amp;query=Oyallon%2C+E">Edouard Oyallon</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Sixin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cella%2C+C">Carmine Cella</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.11214v3-abstract-short" style="display: inline;"> The wavelet scattering transform is an invariant signal representation suitable for many signal processing and machine learning applications. We present the Kymatio software package, an easy-to-use, high-performance Python implementation of the scattering transform in 1D, 2D, and 3D that is compatible with modern deep learning frameworks. All transforms may be executed on a GPU (in addition to CPU&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.11214v3-abstract-full').style.display = 'inline'; document.getElementById('1812.11214v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.11214v3-abstract-full" style="display: none;"> The wavelet scattering transform is an invariant signal representation suitable for many signal processing and machine learning applications. We present the Kymatio software package, an easy-to-use, high-performance Python implementation of the scattering transform in 1D, 2D, and 3D that is compatible with modern deep learning frameworks. All transforms may be executed on a GPU (in addition to CPU), offering a considerable speed up over CPU implementations. The package also has a small memory footprint, resulting inefficient memory usage. The source code, documentation, and examples are available undera BSD license at https://www.kymat.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.11214v3-abstract-full').style.display = 'none'; document.getElementById('1812.11214v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.00571">arXiv:1805.00571</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1805.00571">pdf</a>, <a href="https://arxiv.org/format/1805.00571">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1063/1.5023798">10.1063/1.5023798 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Solid Harmonic Wavelet Scattering for Predictions of Molecule Properties </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Exarchakis%2C+G">Georgios Exarchakis</a>, <a href="/search/cs?searchtype=author&amp;query=Hirn%2C+M">Matthew Hirn</a>, <a href="/search/cs?searchtype=author&amp;query=Mallat%2C+S">St茅phane Mallat</a>, <a href="/search/cs?searchtype=author&amp;query=Thiry%2C+L">Louis Thiry</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.00571v1-abstract-short" style="display: inline;"> We present a machine learning algorithm for the prediction of molecule properties inspired by ideas from density functional theory. Using Gaussian-type orbital functions, we create surrogate electronic densities of the molecule from which we compute invariant &#34;solid harmonic scattering coefficients&#34; that account for different types of interactions at different scales. Multi-linear regressions of v&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.00571v1-abstract-full').style.display = 'inline'; document.getElementById('1805.00571v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.00571v1-abstract-full" style="display: none;"> We present a machine learning algorithm for the prediction of molecule properties inspired by ideas from density functional theory. Using Gaussian-type orbital functions, we create surrogate electronic densities of the molecule from which we compute invariant &#34;solid harmonic scattering coefficients&#34; that account for different types of interactions at different scales. Multi-linear regressions of various physical properties of molecules are computed from these invariant coefficients. Numerical experiments show that these regressions have near state of the art performance, even with relatively few training examples. Predictions over small sets of scattering coefficients can reach a DFT precision while being interpretable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.00571v1-abstract-full').style.display = 'none'; document.getElementById('1805.00571v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Keywords: wavelets, electronic structure calculations, solid harmonics, invariants, multilinear regression</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> J. Chem. Phys. 148, 241732 (2018) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1512.06999">arXiv:1512.06999</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1512.06999">pdf</a>, <a href="https://arxiv.org/ps/1512.06999">ps</a>, <a href="https://arxiv.org/format/1512.06999">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> FAASTA: A fast solver for total-variation regularization of ill-conditioned problems with application to brain imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Varoquaux%2C+G">Ga毛l Varoquaux</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Dohmatob%2C+E">Elvis Dohmatob</a>, <a href="/search/cs?searchtype=author&amp;query=Thirion%2C+B">Bertand Thirion</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1512.06999v1-abstract-short" style="display: inline;"> The total variation (TV) penalty, as many other analysis-sparsity problems, does not lead to separable factors or a proximal operatorwith a closed-form expression, such as soft thresholding for the $\ell\_1$ penalty. As a result, in a variational formulation of an inverse problem or statisticallearning estimation, it leads to challenging non-smooth optimization problemsthat are often solved with e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.06999v1-abstract-full').style.display = 'inline'; document.getElementById('1512.06999v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1512.06999v1-abstract-full" style="display: none;"> The total variation (TV) penalty, as many other analysis-sparsity problems, does not lead to separable factors or a proximal operatorwith a closed-form expression, such as soft thresholding for the $\ell\_1$ penalty. As a result, in a variational formulation of an inverse problem or statisticallearning estimation, it leads to challenging non-smooth optimization problemsthat are often solved with elaborate single-step first-order methods. When thedata-fit term arises from empirical measurements, as in brain imaging, it isoften very ill-conditioned and without simple structure. In this situation, in proximal splitting methods, the computation cost of thegradient step can easily dominate each iteration. Thus it is beneficialto minimize the number of gradient steps.We present fAASTA, a variant of FISTA, that relies on an internal solver forthe TV proximal operator, and refines its tolerance to balance computationalcost of the gradient and the proximal steps. We give benchmarks andillustrations on &#34;brain decoding&#34;: recovering brain maps from noisymeasurements to predict observed behavior. The algorithm as well as theempirical study of convergence speed are valuable for any non-exact proximaloperator, in particular analysis-sparsity problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.06999v1-abstract-full').style.display = 'none'; document.getElementById('1512.06999v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Colloque GRETSI, Sep 2015, Lyon, France. Gretsi, 2015, http://www.gretsi.fr/colloque2015/myGretsi/programme.php </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1412.3919">arXiv:1412.3919</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1412.3919">pdf</a>, <a href="https://arxiv.org/format/1412.3919">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Machine Learning for Neuroimaging with Scikit-Learn </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Abraham%2C+A">Alexandre Abraham</a>, <a href="/search/cs?searchtype=author&amp;query=Pedregosa%2C+F">Fabian Pedregosa</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Gervais%2C+P">Philippe Gervais</a>, <a href="/search/cs?searchtype=author&amp;query=Muller%2C+A">Andreas Muller</a>, <a href="/search/cs?searchtype=author&amp;query=Kossaifi%2C+J">Jean Kossaifi</a>, <a href="/search/cs?searchtype=author&amp;query=Gramfort%2C+A">Alexandre Gramfort</a>, <a href="/search/cs?searchtype=author&amp;query=Thirion%2C+B">Bertrand Thirion</a>, <a href="/search/cs?searchtype=author&amp;query=Varoquaux%2C+G">G盲el Varoquaux</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1412.3919v1-abstract-short" style="display: inline;"> Statistical machine learning methods are increasingly used for neuroimaging data analysis. Their main virtue is their ability to model high-dimensional datasets, e.g. multivariate analysis of activation images or resting-state time series. Supervised learning is typically used in decoding or encoding settings to relate brain images to behavioral or clinical observations, while unsupervised learnin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1412.3919v1-abstract-full').style.display = 'inline'; document.getElementById('1412.3919v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1412.3919v1-abstract-full" style="display: none;"> Statistical machine learning methods are increasingly used for neuroimaging data analysis. Their main virtue is their ability to model high-dimensional datasets, e.g. multivariate analysis of activation images or resting-state time series. Supervised learning is typically used in decoding or encoding settings to relate brain images to behavioral or clinical observations, while unsupervised learning can uncover hidden structures in sets of images (e.g. resting state functional MRI) or find sub-populations in large cohorts. By considering different functional neuroimaging applications, we illustrate how scikit-learn, a Python machine learning library, can be used to perform some key analysis steps. Scikit-learn contains a very large set of statistical learning algorithms, both supervised and unsupervised, and its application to neuroimaging data provides a versatile tool to study the brain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1412.3919v1-abstract-full').style.display = 'none'; document.getElementById('1412.3919v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Frontiers in neuroscience, Frontiers Research Foundation, 2013, pp.15</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1402.7015">arXiv:1402.7015</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1402.7015">pdf</a>, <a href="https://arxiv.org/format/1402.7015">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neuroimage.2014.09.060">10.1016/j.neuroimage.2014.09.060 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Data-driven HRF estimation for encoding and decoding models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pedregosa%2C+F">Fabian Pedregosa</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Ciuciu%2C+P">Philippe Ciuciu</a>, <a href="/search/cs?searchtype=author&amp;query=Thirion%2C+B">Bertrand Thirion</a>, <a href="/search/cs?searchtype=author&amp;query=Gramfort%2C+A">Alexandre Gramfort</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1402.7015v6-abstract-short" style="display: inline;"> Despite the common usage of a canonical, data-independent, hemodynamic response function (HRF), it is known that the shape of the HRF varies across brain regions and subjects. This suggests that a data-driven estimation of this function could lead to more statistical power when modeling BOLD fMRI data. However, unconstrained estimation of the HRF can yield highly unstable results when the number o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1402.7015v6-abstract-full').style.display = 'inline'; document.getElementById('1402.7015v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1402.7015v6-abstract-full" style="display: none;"> Despite the common usage of a canonical, data-independent, hemodynamic response function (HRF), it is known that the shape of the HRF varies across brain regions and subjects. This suggests that a data-driven estimation of this function could lead to more statistical power when modeling BOLD fMRI data. However, unconstrained estimation of the HRF can yield highly unstable results when the number of free parameters is large. We develop a method for the joint estimation of activation and HRF using a rank constraint causing the estimated HRF to be equal across events/conditions, yet permitting it to be different across voxels. Model estimation leads to an optimization problem that we propose to solve with an efficient quasi-Newton method exploiting fast gradient computations. This model, called GLM with Rank-1 constraint (R1-GLM), can be extended to the setting of GLM with separate designs which has been shown to improve decoding accuracy in brain activity decoding experiments. We compare 10 different HRF modeling methods in terms of encoding and decoding score in two different datasets. Our results show that the R1-GLM model significantly outperforms competing methods in both encoding and decoding settings, positioning it as an attractive method both from the points of view of accuracy and computational efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1402.7015v6-abstract-full').style.display = 'none'; document.getElementById('1402.7015v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2014; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">appears in NeuroImage (2015)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1310.1257">arXiv:1310.1257</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1310.1257">pdf</a>, <a href="https://arxiv.org/format/1310.1257">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Second order scattering descriptors predict fMRI activity due to visual textures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Pedregosa%2C+F">Fabian Pedregosa</a>, <a href="/search/cs?searchtype=author&amp;query=Mehdi%2C+S">Senoussi Mehdi</a>, <a href="/search/cs?searchtype=author&amp;query=Gramfort%2C+A">Alexandre Gramfort</a>, <a href="/search/cs?searchtype=author&amp;query=Thirion%2C+B">Bertrand Thirion</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1310.1257v1-abstract-short" style="display: inline;"> Second layer scattering descriptors are known to provide good classification performance on natural quasi-stationary processes such as visual textures due to their sensitivity to higher order moments and continuity with respect to small deformations. In a functional Magnetic Resonance Imaging (fMRI) experiment we present visual textures to subjects and evaluate the predictive power of these descri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.1257v1-abstract-full').style.display = 'inline'; document.getElementById('1310.1257v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1310.1257v1-abstract-full" style="display: none;"> Second layer scattering descriptors are known to provide good classification performance on natural quasi-stationary processes such as visual textures due to their sensitivity to higher order moments and continuity with respect to small deformations. In a functional Magnetic Resonance Imaging (fMRI) experiment we present visual textures to subjects and evaluate the predictive power of these descriptors with respect to the predictive power of simple contour energy - the first scattering layer. We are able to conclude not only that invariant second layer scattering coefficients better encode voxel activity, but also that well predicted voxels need not necessarily lie in known retinotopic regions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.1257v1-abstract-full').style.display = 'none'; document.getElementById('1310.1257v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3nd International Workshop on Pattern Recognition in NeuroImaging (2013)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1305.2788">arXiv:1305.2788</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1305.2788">pdf</a>, <a href="https://arxiv.org/format/1305.2788">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> HRF estimation improves sensitivity of fMRI encoding and decoding models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pedregosa%2C+F">Fabian Pedregosa</a>, <a href="/search/cs?searchtype=author&amp;query=Eickenberg%2C+M">Michael Eickenberg</a>, <a href="/search/cs?searchtype=author&amp;query=Thirion%2C+B">Bertrand Thirion</a>, <a href="/search/cs?searchtype=author&amp;query=Gramfort%2C+A">Alexandre Gramfort</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1305.2788v1-abstract-short" style="display: inline;"> Extracting activation patterns from functional Magnetic Resonance Images (fMRI) datasets remains challenging in rapid-event designs due to the inherent delay of blood oxygen level-dependent (BOLD) signal. The general linear model (GLM) allows to estimate the activation from a design matrix and a fixed hemodynamic response function (HRF). However, the HRF is known to vary substantially between subj&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1305.2788v1-abstract-full').style.display = 'inline'; document.getElementById('1305.2788v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1305.2788v1-abstract-full" style="display: none;"> Extracting activation patterns from functional Magnetic Resonance Images (fMRI) datasets remains challenging in rapid-event designs due to the inherent delay of blood oxygen level-dependent (BOLD) signal. The general linear model (GLM) allows to estimate the activation from a design matrix and a fixed hemodynamic response function (HRF). However, the HRF is known to vary substantially between subjects and brain regions. In this paper, we propose a model for jointly estimating the hemodynamic response function (HRF) and the activation patterns via a low-rank representation of task effects.This model is based on the linearity assumption behind the GLM and can be computed using standard gradient-based solvers. We use the activation patterns computed by our model as input data for encoding and decoding studies and report performance improvement in both settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1305.2788v1-abstract-full').style.display = 'none'; document.getElementById('1305.2788v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3nd International Workshop on Pattern Recognition in NeuroImaging (2013)</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10