Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–31 of 31 results for author: <span class="mathjax">Trivedi, S</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Trivedi%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Trivedi, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Trivedi%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Trivedi, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10042">arXiv:2410.10042</a> <span> [<a href="https://arxiv.org/pdf/2410.10042">pdf</a>, <a href="https://arxiv.org/format/2410.10042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LoRE: Logit-Ranked Retriever Ensemble for Enhancing Open-Domain Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sanniboina%2C+S">Saikrishna Sanniboina</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shiv Trivedi</a>, <a href="/search/cs?searchtype=author&query=Vijayaraghavan%2C+S">Sreenidhi Vijayaraghavan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10042v1-abstract-short" style="display: inline;"> Retrieval-based question answering systems often suffer from positional bias, leading to suboptimal answer generation. We propose LoRE (Logit-Ranked Retriever Ensemble), a novel approach that improves answer accuracy and relevance by mitigating positional bias. LoRE employs an ensemble of diverse retrievers, such as BM25 and sentence transformers with FAISS indexing. A key innovation is a logit-ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10042v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10042v1-abstract-full" style="display: none;"> Retrieval-based question answering systems often suffer from positional bias, leading to suboptimal answer generation. We propose LoRE (Logit-Ranked Retriever Ensemble), a novel approach that improves answer accuracy and relevance by mitigating positional bias. LoRE employs an ensemble of diverse retrievers, such as BM25 and sentence transformers with FAISS indexing. A key innovation is a logit-based answer ranking algorithm that combines the logit scores from a large language model (LLM), with the retrieval ranks of the passages. Experimental results on NarrativeQA, SQuAD demonstrate that LoRE significantly outperforms existing retrieval-based methods in terms of exact match and F1 scores. On SQuAD, LoRE achieves 14.5\%, 22.83\%, and 14.95\% improvements over the baselines for ROUGE-L, EM, and F1, respectively. Qualitatively, LoRE generates more relevant and accurate answers, especially for complex queries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10042v1-abstract-full').style.display = 'none'; document.getElementById('2410.10042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11772">arXiv:2409.11772</a> <span> [<a href="https://arxiv.org/pdf/2409.11772">pdf</a>, <a href="https://arxiv.org/format/2409.11772">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Symmetry-Based Structured Matrices for Efficient Approximately Equivariant Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Samudre%2C+A">Ashwin Samudre</a>, <a href="/search/cs?searchtype=author&query=Petrache%2C+M">Mircea Petrache</a>, <a href="/search/cs?searchtype=author&query=Nord%2C+B+D">Brian D. Nord</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11772v1-abstract-short" style="display: inline;"> There has been much recent interest in designing symmetry-aware neural networks (NNs) exhibiting relaxed equivariance. Such NNs aim to interpolate between being exactly equivariant and being fully flexible, affording consistent performance benefits. In a separate line of work, certain structured parameter matrices -- those with displacement structure, characterized by low displacement rank (LDR) -… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11772v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11772v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11772v1-abstract-full" style="display: none;"> There has been much recent interest in designing symmetry-aware neural networks (NNs) exhibiting relaxed equivariance. Such NNs aim to interpolate between being exactly equivariant and being fully flexible, affording consistent performance benefits. In a separate line of work, certain structured parameter matrices -- those with displacement structure, characterized by low displacement rank (LDR) -- have been used to design small-footprint NNs. Displacement structure enables fast function and gradient evaluation, but permits accurate approximations via compression primarily to classical convolutional neural networks (CNNs). In this work, we propose a general framework -- based on a novel construction of symmetry-based structured matrices -- to build approximately equivariant NNs with significantly reduced parameter counts. Our framework integrates the two aforementioned lines of work via the use of so-called Group Matrices (GMs), a forgotten precursor to the modern notion of regular representations of finite groups. GMs allow the design of structured matrices -- resembling LDR matrices -- which generalize the linear operations of a classical CNN from cyclic groups to general finite groups and their homogeneous spaces. We show that GMs can be employed to extend all the elementary operations of CNNs to general discrete groups. Further, the theory of structured matrices based on GMs provides a generalization of LDR theory focussed on matrices with cyclic structure, providing a tool for implementing approximate equivariance for discrete groups. We test GM-based architectures on a variety of tasks in the presence of relaxed symmetry. We report that our framework consistently performs competitively compared to approximately equivariant NNs, and other structured matrix-based compression frameworks, sometimes with a one or two orders of magnitude lower parameter count. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11772v1-abstract-full').style.display = 'none'; document.getElementById('2409.11772v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13242">arXiv:2408.13242</a> <span> [<a href="https://arxiv.org/pdf/2408.13242">pdf</a>, <a href="https://arxiv.org/format/2408.13242">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Equivariant Model Training via Constraint Relaxation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pertigkiozoglou%2C+S">Stefanos Pertigkiozoglou</a>, <a href="/search/cs?searchtype=author&query=Chatzipantazis%2C+E">Evangelos Chatzipantazis</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Daniilidis%2C+K">Kostas Daniilidis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13242v1-abstract-short" style="display: inline;"> Equivariant neural networks have been widely used in a variety of applications due to their ability to generalize well in tasks where the underlying data symmetries are known. Despite their successes, such networks can be difficult to optimize and require careful hyperparameter tuning to train successfully. In this work, we propose a novel framework for improving the optimization of such models by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13242v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13242v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13242v1-abstract-full" style="display: none;"> Equivariant neural networks have been widely used in a variety of applications due to their ability to generalize well in tasks where the underlying data symmetries are known. Despite their successes, such networks can be difficult to optimize and require careful hyperparameter tuning to train successfully. In this work, we propose a novel framework for improving the optimization of such models by relaxing the hard equivariance constraint during training: We relax the equivariance constraint of the network's intermediate layers by introducing an additional non-equivariance term that we progressively constrain until we arrive at an equivariant solution. By controlling the magnitude of the activation of the additional relaxation term, we allow the model to optimize over a larger hypothesis space containing approximate equivariant networks and converge back to an equivariant solution at the end of training. We provide experimental results on different state-of-the-art network architectures, demonstrating how this training framework can result in equivariant models with improved generalization performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13242v1-abstract-full').style.display = 'none'; document.getElementById('2408.13242v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01806">arXiv:2406.01806</a> <span> [<a href="https://arxiv.org/pdf/2406.01806">pdf</a>, <a href="https://arxiv.org/format/2406.01806">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Contextualized Sequence Likelihood: Enhanced Confidence Scores for Natural Language Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01806v1-abstract-short" style="display: inline;"> The advent of large language models (LLMs) has dramatically advanced the state-of-the-art in numerous natural language generation tasks. For LLMs to be applied reliably, it is essential to have an accurate measure of their confidence. Currently, the most commonly used confidence score function is the likelihood of the generated sequence, which, however, conflates semantic and syntactic components.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01806v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01806v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01806v1-abstract-full" style="display: none;"> The advent of large language models (LLMs) has dramatically advanced the state-of-the-art in numerous natural language generation tasks. For LLMs to be applied reliably, it is essential to have an accurate measure of their confidence. Currently, the most commonly used confidence score function is the likelihood of the generated sequence, which, however, conflates semantic and syntactic components. For instance, in question-answering (QA) tasks, an awkward phrasing of the correct answer might result in a lower probability prediction. Additionally, different tokens should be weighted differently depending on the context. In this work, we propose enhancing the predicted sequence probability by assigning different weights to various tokens using attention values elicited from the base LLM. By employing a validation set, we can identify the relevant attention heads, thereby significantly improving the reliability of the vanilla sequence probability confidence measure. We refer to this new score as the Contextualized Sequence Likelihood (CSL). CSL is easy to implement, fast to compute, and offers considerable potential for further improvement with task-specific prompts. Across several QA datasets and a diverse array of LLMs, CSL has demonstrated significantly higher reliability than state-of-the-art baselines in predicting generation quality, as measured by the AUROC or AUARC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01806v1-abstract-full').style.display = 'none'; document.getElementById('2406.01806v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19328">arXiv:2405.19328</a> <span> [<a href="https://arxiv.org/pdf/2405.19328">pdf</a>, <a href="https://arxiv.org/format/2405.19328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Normative Modules: A Generative Agent Architecture for Learning Norms that Supports Multi-Agent Cooperation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sarkar%2C+A">Atrisha Sarkar</a>, <a href="/search/cs?searchtype=author&query=Muresanu%2C+A+I">Andrei Ioan Muresanu</a>, <a href="/search/cs?searchtype=author&query=Blair%2C+C">Carter Blair</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Aaryam Sharma</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+R+S">Rakshit S Trivedi</a>, <a href="/search/cs?searchtype=author&query=Hadfield%2C+G+K">Gillian K Hadfield</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19328v1-abstract-short" style="display: inline;"> Generative agents, which implement behaviors using a large language model (LLM) to interpret and evaluate an environment, has demonstrated the capacity to solve complex tasks across many social and technological domains. However, when these agents interact with other agents and humans in presence of social structures such as existing norms, fostering cooperation between them is a fundamental chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19328v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19328v1-abstract-full" style="display: none;"> Generative agents, which implement behaviors using a large language model (LLM) to interpret and evaluate an environment, has demonstrated the capacity to solve complex tasks across many social and technological domains. However, when these agents interact with other agents and humans in presence of social structures such as existing norms, fostering cooperation between them is a fundamental challenge. In this paper, we develop the framework of a 'Normative Module': an architecture designed to enhance cooperation by enabling agents to recognize and adapt to the normative infrastructure of a given environment. We focus on the equilibrium selection aspect of the cooperation problem and inform our agent design based on the existence of classification institutions that implement correlated equilibrium to provide effective resolution of the equilibrium selection problem. Specifically, the normative module enables agents to learn through peer interactions which of multiple candidate institutions in the environment, does a group treat as authoritative. By enabling normative competence in this sense, agents gain ability to coordinate their sanctioning behaviour; coordinated sanctioning behaviour in turn shapes primary behaviour within a social environment, leading to higher average welfare. We design a new environment that supports institutions and evaluate the proposed framework based on two key criteria derived from agent interactions with peers and institutions: (i) the agent's ability to disregard non-authoritative institutions and (ii) the agent's ability to identify authoritative institutions among several options. We show that these capabilities allow the agent to achieve more stable cooperative outcomes compared to baseline agents without the normative module, paving the way for research in a new avenue of designing environments and agents that account for normative infrastructure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19328v1-abstract-full').style.display = 'none'; document.getElementById('2405.19328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01629">arXiv:2402.01629</a> <span> [<a href="https://arxiv.org/pdf/2402.01629">pdf</a>, <a href="https://arxiv.org/ps/2402.01629">ps</a>, <a href="https://arxiv.org/format/2402.01629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Position Paper: Generalized grammar rules and structure-based generalization beyond classical equivariance for lexical tasks and transduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Petrache%2C+M">Mircea Petrache</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01629v1-abstract-short" style="display: inline;"> Compositional generalization is one of the main properties which differentiates lexical learning in humans from state-of-art neural networks. We propose a general framework for building models that can generalize compositionally using the concept of Generalized Grammar Rules (GGRs), a class of symmetry-based compositional constraints for transduction tasks, which we view as a transduction analogue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01629v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01629v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01629v1-abstract-full" style="display: none;"> Compositional generalization is one of the main properties which differentiates lexical learning in humans from state-of-art neural networks. We propose a general framework for building models that can generalize compositionally using the concept of Generalized Grammar Rules (GGRs), a class of symmetry-based compositional constraints for transduction tasks, which we view as a transduction analogue of equivariance constraints in physics-inspired tasks. Besides formalizing generalized notions of symmetry for language transduction, our framework is general enough to contain many existing works as special cases. We present ideas on how GGRs might be implemented, and in the process draw connections to reinforcement learning and other areas of research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01629v1-abstract-full').style.display = 'none'; document.getElementById('2402.01629v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.12784">arXiv:2308.12784</a> <span> [<a href="https://arxiv.org/pdf/2308.12784">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Understanding Container-based Services under Software Aging: Dependability and Performance Views </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+J">Jing Bai</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+X">Xiaolin Chang</a>, <a href="/search/cs?searchtype=author&query=Machida%2C+F">Fumio Machida</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+K+S">Kishor S. Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.12784v1-abstract-short" style="display: inline;"> Container technology, as the key enabler behind microservice architectures, is widely applied in Cloud and Edge Computing. A long and continuous running of operating system (OS) host-ing container-based services can encounter software aging that leads to performance deterioration and even causes system fail-ures. OS rejuvenation techniques can mitigate the impact of software aging but the rejuvena… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12784v1-abstract-full').style.display = 'inline'; document.getElementById('2308.12784v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.12784v1-abstract-full" style="display: none;"> Container technology, as the key enabler behind microservice architectures, is widely applied in Cloud and Edge Computing. A long and continuous running of operating system (OS) host-ing container-based services can encounter software aging that leads to performance deterioration and even causes system fail-ures. OS rejuvenation techniques can mitigate the impact of software aging but the rejuvenation trigger interval needs to be carefully determined to reduce the downtime cost due to rejuve-nation. This paper proposes a comprehensive semi-Markov-based approach to quantitatively evaluate the effect of OS reju-venation on the dependability and the performance of a con-tainer-based service. In contrast to the existing studies, we nei-ther restrict the distributions of time intervals of events to be exponential nor assume that backup resources are always avail-able. Through the numerical study, we show the optimal con-tainer-migration trigger intervals that can maximize the de-pendability or minimize the performance of a container-based service. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12784v1-abstract-full').style.display = 'none'; document.getElementById('2308.12784v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.12748">arXiv:2308.12748</a> <span> [<a href="https://arxiv.org/pdf/2308.12748">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Towards Semi-Markov Model-based Dependability Evaluation of VM-based Multi-Domain Service Function Chain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lina Liu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+J">Jing Bai</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+X">Xiaolin Chang</a>, <a href="/search/cs?searchtype=author&query=Machida%2C+F">Fumio Machida</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+K+S">Kishor S. Trivedi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Haoran Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.12748v1-abstract-short" style="display: inline;"> In NFV networks, service functions (SFs) can be deployed on virtual machines (VMs) across multiple domains and then form a service function chain (MSFC) for end-to-end network service provision. However, any software component in a VM-based MSFC must experience software aging issue after a long period of operation. This paper quantitatively investigates the capability of proactive rejuvenation tec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12748v1-abstract-full').style.display = 'inline'; document.getElementById('2308.12748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.12748v1-abstract-full" style="display: none;"> In NFV networks, service functions (SFs) can be deployed on virtual machines (VMs) across multiple domains and then form a service function chain (MSFC) for end-to-end network service provision. However, any software component in a VM-based MSFC must experience software aging issue after a long period of operation. This paper quantitatively investigates the capability of proactive rejuvenation techniques in reducing the damage of software aging on a VM-based MSFC. We develop a semi-Markov model to capture the behaviors of SFs, VMs and virtual machine monitors (VMMs) from software aging to recovery under the condition that failure times and recovery times follow general distributions. We derive the formulas for calculating the steady-state availability and reliability of the VM-based MSFC composed of multiple SFs running on VMs hosted by VMMs. Sensitivity analysis is also conducted to identify potential dependability bottlenecks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12748v1-abstract-full').style.display = 'none'; document.getElementById('2308.12748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.19187">arXiv:2305.19187</a> <span> [<a href="https://arxiv.org/pdf/2305.19187">pdf</a>, <a href="https://arxiv.org/format/2305.19187">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Generating with Confidence: Uncertainty Quantification for Black-box Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.19187v3-abstract-short" style="display: inline;"> Large language models (LLMs) specializing in natural language generation (NLG) have recently started exhibiting promising capabilities across a variety of domains. However, gauging the trustworthiness of responses generated by LLMs remains an open challenge, with limited research on uncertainty quantification (UQ) for NLG. Furthermore, existing literature typically assumes white-box access to lang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19187v3-abstract-full').style.display = 'inline'; document.getElementById('2305.19187v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.19187v3-abstract-full" style="display: none;"> Large language models (LLMs) specializing in natural language generation (NLG) have recently started exhibiting promising capabilities across a variety of domains. However, gauging the trustworthiness of responses generated by LLMs remains an open challenge, with limited research on uncertainty quantification (UQ) for NLG. Furthermore, existing literature typically assumes white-box access to language models, which is becoming unrealistic either due to the closed-source nature of the latest LLMs or computational constraints. In this work, we investigate UQ in NLG for *black-box* LLMs. We first differentiate *uncertainty* vs *confidence*: the former refers to the ``dispersion'' of the potential predictions for a fixed input, and the latter refers to the confidence on a particular prediction/generation. We then propose and compare several confidence/uncertainty measures, applying them to *selective NLG* where unreliable results could either be ignored or yielded for further assessment. Experiments were carried out with several popular LLMs on question-answering datasets (for evaluation purposes). Results reveal that a simple measure for the semantic dispersion can be a reliable predictor of the quality of LLM responses, providing valuable insights for practitioners on uncertainty management when adopting LLMs. The code to replicate our experiments is available at https://github.com/zlin7/UQ-NLG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19187v3-abstract-full').style.display = 'none'; document.getElementById('2305.19187v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Transactions on Machine Learning Research (05/2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.17592">arXiv:2305.17592</a> <span> [<a href="https://arxiv.org/pdf/2305.17592">pdf</a>, <a href="https://arxiv.org/format/2305.17592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Approximation-Generalization Trade-offs under (Approximate) Group Equivariance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Petrache%2C+M">Mircea Petrache</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.17592v1-abstract-short" style="display: inline;"> The explicit incorporation of task-specific inductive biases through symmetry has emerged as a general design precept in the development of high-performance machine learning models. For example, group equivariant neural networks have demonstrated impressive performance across various domains and applications such as protein and drug design. A prevalent intuition about such models is that the integ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17592v1-abstract-full').style.display = 'inline'; document.getElementById('2305.17592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.17592v1-abstract-full" style="display: none;"> The explicit incorporation of task-specific inductive biases through symmetry has emerged as a general design precept in the development of high-performance machine learning models. For example, group equivariant neural networks have demonstrated impressive performance across various domains and applications such as protein and drug design. A prevalent intuition about such models is that the integration of relevant symmetry results in enhanced generalization. Moreover, it is posited that when the data and/or the model may only exhibit $\textit{approximate}$ or $\textit{partial}$ symmetry, the optimal or best-performing model is one where the model symmetry aligns with the data symmetry. In this paper, we conduct a formal unified investigation of these intuitions. To begin, we present general quantitative bounds that demonstrate how models capturing task-specific symmetries lead to improved generalization. In fact, our results do not require the transformations to be finite or even form a group and can work with partial or approximate equivariance. Utilizing this quantification, we examine the more general question of model mis-specification i.e. when the model symmetries don't align with the data symmetries. We establish, for a given symmetry group, a quantitative comparison between the approximate/partial equivariance of the model and that of the data distribution, precisely connecting model equivariance error and data equivariance error. Our result delineates conditions under which the model equivariance error is optimal, thereby yielding the best-performing model for the given task and data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17592v1-abstract-full').style.display = 'none'; document.getElementById('2305.17592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00839">arXiv:2302.00839</a> <span> [<a href="https://arxiv.org/pdf/2302.00839">pdf</a>, <a href="https://arxiv.org/format/2302.00839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Fast Online Value-Maximizing Prediction Sets with Conformal Cost Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Cao Xiao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00839v3-abstract-short" style="display: inline;"> Many real-world multi-label prediction problems involve set-valued predictions that must satisfy specific requirements dictated by downstream usage. We focus on a typical scenario where such requirements, separately encoding $\textit{value}$ and $\textit{cost}$, compete with each other. For instance, a hospital might expect a smart diagnosis system to capture as many severe, often co-morbid, disea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00839v3-abstract-full').style.display = 'inline'; document.getElementById('2302.00839v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00839v3-abstract-full" style="display: none;"> Many real-world multi-label prediction problems involve set-valued predictions that must satisfy specific requirements dictated by downstream usage. We focus on a typical scenario where such requirements, separately encoding $\textit{value}$ and $\textit{cost}$, compete with each other. For instance, a hospital might expect a smart diagnosis system to capture as many severe, often co-morbid, diseases as possible (the value), while maintaining strict control over incorrect predictions (the cost). We present a general pipeline, dubbed as FavMac, to maximize the value while controlling the cost in such scenarios. FavMac can be combined with almost any multi-label classifier, affording distribution-free theoretical guarantees on cost control. Moreover, unlike prior works, it can handle real-world large-scale applications via a carefully designed online update mechanism, which is of independent interest. Our methodological and theoretical contributions are supported by experiments on several healthcare tasks and synthetic datasets - FavMac furnishes higher value compared with several variants and baselines while maintaining strict cost control. Our code is available at https://github.com/zlin7/FavMac <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00839v3-abstract-full').style.display = 'none'; document.getElementById('2302.00839v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the 40th International Conference on Machine Learning, Honolulu, Hawaii, USA. PMLR 202, 2023. 11 pages (main paper, including references) + 10 pages (supplementary material)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.03529">arXiv:2207.03529</a> <span> [<a href="https://arxiv.org/pdf/2207.03529">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Novel IoT-based Framework for Non-Invasive Human Hygiene Monitoring using Machine Learning Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Faruk%2C+M+J+H">Md Jobair Hossain Faruk</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shashank Trivedi</a>, <a href="/search/cs?searchtype=author&query=Masum%2C+M">Mohammad Masum</a>, <a href="/search/cs?searchtype=author&query=Valero%2C+M">Maria Valero</a>, <a href="/search/cs?searchtype=author&query=Shahriar%2C+H">Hossain Shahriar</a>, <a href="/search/cs?searchtype=author&query=Ahamed%2C+S+I">Sheikh Iqbal Ahamed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.03529v1-abstract-short" style="display: inline;"> People's personal hygiene habits speak volumes about the condition of taking care of their bodies and health in daily lifestyle. Maintaining good hygiene practices not only reduces the chances of contracting a disease but could also reduce the risk of spreading illness within the community. Given the current pandemic, daily habits such as washing hands or taking regular showers have taken primary… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.03529v1-abstract-full').style.display = 'inline'; document.getElementById('2207.03529v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.03529v1-abstract-full" style="display: none;"> People's personal hygiene habits speak volumes about the condition of taking care of their bodies and health in daily lifestyle. Maintaining good hygiene practices not only reduces the chances of contracting a disease but could also reduce the risk of spreading illness within the community. Given the current pandemic, daily habits such as washing hands or taking regular showers have taken primary importance among people, especially for the elderly population living alone at home or in an assisted living facility. This paper presents a novel and non-invasive framework for monitoring human hygiene using vibration sensors where we adopt Machine Learning techniques. The approach is based on a combination of a geophone sensor, a digitizer, and a cost-efficient computer board in a practical enclosure. Monitoring daily hygiene routines may help healthcare professionals be proactive rather than reactive in identifying and controlling the spread of potential outbreaks within the community. The experimental result indicates that applying a Support Vector Machine (SVM) for binary classification exhibits a promising accuracy of ~95% in the classification of different hygiene habits. Furthermore, both tree-based classifier (Random Forrest and Decision Tree) outperforms other models by achieving the highest accuracy (100%), which means that classifying hygiene events using vibration and non-invasive sensors is possible for monitoring hygiene activity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.03529v1-abstract-full').style.display = 'none'; document.getElementById('2207.03529v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICHI 2022 : 10th IEEE International Conference on Healthcare Informatics </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12940">arXiv:2205.12940</a> <span> [<a href="https://arxiv.org/pdf/2205.12940">pdf</a>, <a href="https://arxiv.org/format/2205.12940">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Conformal Prediction Intervals with Temporal Dependence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12940v3-abstract-short" style="display: inline;"> Cross-sectional prediction is common in many domains such as healthcare, including forecasting tasks using electronic health records, where different patients form a cross-section. We focus on the task of constructing valid prediction intervals (PIs) in time series regression with a cross-section. A prediction interval is considered valid if it covers the true response with (a pre-specified) high… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12940v3-abstract-full').style.display = 'inline'; document.getElementById('2205.12940v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12940v3-abstract-full" style="display: none;"> Cross-sectional prediction is common in many domains such as healthcare, including forecasting tasks using electronic health records, where different patients form a cross-section. We focus on the task of constructing valid prediction intervals (PIs) in time series regression with a cross-section. A prediction interval is considered valid if it covers the true response with (a pre-specified) high probability. We first distinguish between two notions of validity in such a setting: cross-sectional and longitudinal. Cross-sectional validity is concerned with validity across the cross-section of the time series data, while longitudinal validity accounts for the temporal dimension. Coverage guarantees along both these dimensions are ideally desirable; however, we show that distribution-free longitudinal validity is theoretically impossible. Despite this limitation, we propose Conformal Prediction with Temporal Dependence (CPTD), a procedure that is able to maintain strict cross-sectional validity while improving longitudinal coverage. CPTD is post-hoc and light-weight, and can easily be used in conjunction with any prediction model as long as a calibration set is available. We focus on neural networks due to their ability to model complicated data such as diagnosis codes for time series regression, and perform extensive experimental validation to verify the efficacy of our approach. We find that CPTD outperforms baselines on a variety of datasets by improving longitudinal coverage and often providing more efficient (narrower) PIs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12940v3-abstract-full').style.display = 'none'; document.getElementById('2205.12940v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages (main paper, including references) + 6 pages (supplementary material). Transactions of Machine Learning Research (September 2022). Code is available at https://github.com/zlin7/CPTD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09940">arXiv:2205.09940</a> <span> [<a href="https://arxiv.org/pdf/2205.09940">pdf</a>, <a href="https://arxiv.org/format/2205.09940">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Conformal Prediction with Temporal Quantile Adjustments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09940v2-abstract-short" style="display: inline;"> We develop Temporal Quantile Adjustment (TQA), a general method to construct efficient and valid prediction intervals (PIs) for regression on cross-sectional time series data. Such data is common in many domains, including econometrics and healthcare. A canonical example in healthcare is predicting patient outcomes using physiological time-series data, where a population of patients composes a cro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09940v2-abstract-full').style.display = 'inline'; document.getElementById('2205.09940v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09940v2-abstract-full" style="display: none;"> We develop Temporal Quantile Adjustment (TQA), a general method to construct efficient and valid prediction intervals (PIs) for regression on cross-sectional time series data. Such data is common in many domains, including econometrics and healthcare. A canonical example in healthcare is predicting patient outcomes using physiological time-series data, where a population of patients composes a cross-section. Reliable PI estimators in this setting must address two distinct notions of coverage: cross-sectional coverage across a cross-sectional slice, and longitudinal coverage along the temporal dimension for each time series. Recent works have explored adapting Conformal Prediction (CP) to obtain PIs in the time series context. However, none handles both notions of coverage simultaneously. CP methods typically query a pre-specified quantile from the distribution of nonconformity scores on a calibration set. TQA adjusts the quantile to query in CP at each time $t$, accounting for both cross-sectional and longitudinal coverage in a theoretically-grounded manner. The post-hoc nature of TQA facilitates its use as a general wrapper around any time series regression model. We validate TQA's performance through extensive experimentation: TQA generally obtains efficient PIs and improves longitudinal coverage while preserving cross-sectional coverage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09940v2-abstract-full').style.display = 'none'; document.getElementById('2205.09940v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages (main paper, including references) + 11 pages (supplementary material)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.07679">arXiv:2202.07679</a> <span> [<a href="https://arxiv.org/pdf/2202.07679">pdf</a>, <a href="https://arxiv.org/format/2202.07679">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Taking a Step Back with KCal: Multi-Class Kernel-Based Calibration for Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.07679v3-abstract-short" style="display: inline;"> Deep neural network (DNN) classifiers are often overconfident, producing miscalibrated class probabilities. In high-risk applications like healthcare, practitioners require $\textit{fully calibrated}$ probability predictions for decision-making. That is, conditioned on the prediction $\textit{vector}$, $\textit{every}$ class' probability should be close to the predicted value. Most existing calibr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07679v3-abstract-full').style.display = 'inline'; document.getElementById('2202.07679v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.07679v3-abstract-full" style="display: none;"> Deep neural network (DNN) classifiers are often overconfident, producing miscalibrated class probabilities. In high-risk applications like healthcare, practitioners require $\textit{fully calibrated}$ probability predictions for decision-making. That is, conditioned on the prediction $\textit{vector}$, $\textit{every}$ class' probability should be close to the predicted value. Most existing calibration methods either lack theoretical guarantees for producing calibrated outputs, reduce classification accuracy in the process, or only calibrate the predicted class. This paper proposes a new Kernel-based calibration method called KCal. Unlike existing calibration procedures, KCal does not operate directly on the logits or softmax outputs of the DNN. Instead, KCal learns a metric space on the penultimate-layer latent embedding and generates predictions using kernel density estimates on a calibration set. We first analyze KCal theoretically, showing that it enjoys a provable $\textit{full}$ calibration guarantee. Then, through extensive experiments across a variety of datasets, we show that KCal consistently outperforms baselines as measured by the calibration error and by proper scoring rules like the Brier Score. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07679v3-abstract-full').style.display = 'none'; document.getElementById('2202.07679v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.07472">arXiv:2110.07472</a> <span> [<a href="https://arxiv.org/pdf/2110.07472">pdf</a>, <a href="https://arxiv.org/format/2110.07472">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Capacity of Group-invariant Linear Readouts from Equivariant Representations: How Many Objects can be Linearly Classified Under All Possible Views? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Farrell%2C+M">Matthew Farrell</a>, <a href="/search/cs?searchtype=author&query=Bordelon%2C+B">Blake Bordelon</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Pehlevan%2C+C">Cengiz Pehlevan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.07472v4-abstract-short" style="display: inline;"> Equivariance has emerged as a desirable property of representations of objects subject to identity-preserving transformations that constitute a group, such as translations and rotations. However, the expressivity of a representation constrained by group equivariance is still not fully understood. We address this gap by providing a generalization of Cover's Function Counting Theorem that quantifies… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07472v4-abstract-full').style.display = 'inline'; document.getElementById('2110.07472v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.07472v4-abstract-full" style="display: none;"> Equivariance has emerged as a desirable property of representations of objects subject to identity-preserving transformations that constitute a group, such as translations and rotations. However, the expressivity of a representation constrained by group equivariance is still not fully understood. We address this gap by providing a generalization of Cover's Function Counting Theorem that quantifies the number of linearly separable and group-invariant binary dichotomies that can be assigned to equivariant representations of objects. We find that the fraction of separable dichotomies is determined by the dimension of the space that is fixed by the group action. We show how this relation extends to operations such as convolutions, element-wise nonlinearities, and global and local pooling. While other operations do not change the fraction of separable dichotomies, local pooling decreases the fraction, despite being a highly nonlinear operation. Finally, we test our theory on intermediate representations of randomly initialized and fully trained convolutional neural networks and find perfect agreement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07472v4-abstract-full').style.display = 'none'; document.getElementById('2110.07472v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Version accepted to ICLR 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.00225">arXiv:2106.00225</a> <span> [<a href="https://arxiv.org/pdf/2106.00225">pdf</a>, <a href="https://arxiv.org/format/2106.00225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Locally Valid and Discriminative Prediction Intervals for Deep Learning Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jimeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.00225v4-abstract-short" style="display: inline;"> Crucial for building trust in deep learning models for critical real-world applications is efficient and theoretically sound uncertainty quantification, a task that continues to be challenging. Useful uncertainty information is expected to have two key properties: It should be valid (guaranteeing coverage) and discriminative (more uncertain when the expected risk is high). Moreover, when combined… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.00225v4-abstract-full').style.display = 'inline'; document.getElementById('2106.00225v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.00225v4-abstract-full" style="display: none;"> Crucial for building trust in deep learning models for critical real-world applications is efficient and theoretically sound uncertainty quantification, a task that continues to be challenging. Useful uncertainty information is expected to have two key properties: It should be valid (guaranteeing coverage) and discriminative (more uncertain when the expected risk is high). Moreover, when combined with deep learning (DL) methods, it should be scalable and affect the DL model performance minimally. Most existing Bayesian methods lack frequentist coverage guarantees and usually affect model performance. The few available frequentist methods are rarely discriminative and/or violate coverage guarantees due to unrealistic assumptions. Moreover, many methods are expensive or require substantial modifications to the base neural network. Building upon recent advances in conformal prediction [13, 33] and leveraging the classical idea of kernel regression, we propose Locally Valid and Discriminative prediction intervals (LVD), a simple, efficient, and lightweight method to construct discriminative prediction intervals (PIs) for almost any DL model. With no assumptions on the data distribution, such PIs also offer finite-sample local coverage guarantees (contrasted to the simpler marginal coverage). We empirically verify, using diverse datasets, that besides being the only locally valid method for DL, LVD also exceeds or matches the performance (including coverage rate and prediction accuracy) of existing uncertainty quantification methods, while offering additional benefits in scalability and flexibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.00225v4-abstract-full').style.display = 'none'; document.getElementById('2106.00225v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Advances in Neural Information Processing Systems 34 (NeurIPS 2021). Code is available at https://github.com/zlin7/LVD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.13123">arXiv:2102.13123</a> <span> [<a href="https://arxiv.org/pdf/2102.13123">pdf</a>, <a href="https://arxiv.org/format/2102.13123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/mnras/stab2229">10.1093/mnras/stab2229 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DeepSZ: Identification of Sunyaev-Zel'dovich Galaxy Clusters using Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+N">Nicholas Huang</a>, <a href="/search/cs?searchtype=author&query=Avestruz%2C+C">Camille Avestruz</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W+L+K">W. L. Kimmy Wu</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Caldeira%2C+J">Jo茫o Caldeira</a>, <a href="/search/cs?searchtype=author&query=Nord%2C+B">Brian Nord</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.13123v2-abstract-short" style="display: inline;"> Galaxy clusters identified from the Sunyaev Zel'dovich (SZ) effect are a key ingredient in multi-wavelength cluster-based cosmology. We present a comparison between two methods of cluster identification: the standard Matched Filter (MF) method in SZ cluster finding and a method using Convolutional Neural Networks (CNN). We further implement and show results for a `combined' identifier. We apply th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13123v2-abstract-full').style.display = 'inline'; document.getElementById('2102.13123v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.13123v2-abstract-full" style="display: none;"> Galaxy clusters identified from the Sunyaev Zel'dovich (SZ) effect are a key ingredient in multi-wavelength cluster-based cosmology. We present a comparison between two methods of cluster identification: the standard Matched Filter (MF) method in SZ cluster finding and a method using Convolutional Neural Networks (CNN). We further implement and show results for a `combined' identifier. We apply the methods to simulated millimeter maps for several observing frequencies for an SPT-3G-like survey. There are some key differences between the methods. The MF method requires image pre-processing to remove point sources and a model for the noise, while the CNN method requires very little pre-processing of images. Additionally, the CNN requires tuning of hyperparameters in the model and takes as input, cutout images of the sky. Specifically, we use the CNN to classify whether or not an 8 arcmin $\times$ 8 arcmin cutout of the sky contains a cluster. We compare differences in purity and completeness. The MF signal-to-noise ratio depends on both mass and redshift. Our CNN, trained for a given mass threshold, captures a different set of clusters than the MF, some of which have SNR below the MF detection threshold. However, the CNN tends to mis-classify cutouts whose clusters are located near the edge of the cutout, which can be mitigated with staggered cutouts. We leverage the complementarity of the two methods, combining the scores from each method for identification. The purity and completeness of the MF alone are both 0.61, assuming a standard detection threshold. The purity and completeness of the CNN alone are 0.59 and 0.61. The combined classification method yields 0.60 and 0.77, a significant increase for completeness with a modest decrease in purity. We advocate for combined methods that increase the confidence of many lower signal-to-noise clusters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13123v2-abstract-full').style.display = 'none'; document.getElementById('2102.13123v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> FERMILAB-PUB-21-077-SCD </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.04474">arXiv:2012.04474</a> <span> [<a href="https://arxiv.org/pdf/2012.04474">pdf</a>, <a href="https://arxiv.org/format/2012.04474">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rotation-Invariant Autoencoders for Signals on Spheres </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lohit%2C+S">Suhas Lohit</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.04474v1-abstract-short" style="display: inline;"> Omnidirectional images and spherical representations of $3D$ shapes cannot be processed with conventional 2D convolutional neural networks (CNNs) as the unwrapping leads to large distortion. Using fast implementations of spherical and $SO(3)$ convolutions, researchers have recently developed deep learning methods better suited for classifying spherical images. These newly proposed convolutional la… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.04474v1-abstract-full').style.display = 'inline'; document.getElementById('2012.04474v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.04474v1-abstract-full" style="display: none;"> Omnidirectional images and spherical representations of $3D$ shapes cannot be processed with conventional 2D convolutional neural networks (CNNs) as the unwrapping leads to large distortion. Using fast implementations of spherical and $SO(3)$ convolutions, researchers have recently developed deep learning methods better suited for classifying spherical images. These newly proposed convolutional layers naturally extend the notion of convolution to functions on the unit sphere $S^2$ and the group of rotations $SO(3)$ and these layers are equivariant to 3D rotations. In this paper, we consider the problem of unsupervised learning of rotation-invariant representations for spherical images. In particular, we carefully design an autoencoder architecture consisting of $S^2$ and $SO(3)$ convolutional layers. As 3D rotations are often a nuisance factor, the latent space is constrained to be exactly invariant to these input transformations. As the rotation information is discarded in the latent space, we craft a novel rotation-invariant loss function for training the network. Extensive experiments on multiple datasets demonstrate the usefulness of the learned representations on clustering, retrieval and classification applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.04474v1-abstract-full').style.display = 'none'; document.getElementById('2012.04474v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.03550">arXiv:2006.03550</a> <span> [<a href="https://arxiv.org/pdf/2006.03550">pdf</a>, <a href="https://arxiv.org/ps/2006.03550">ps</a>, <a href="https://arxiv.org/format/2006.03550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Expected Jacobian Outerproduct: Theory and Empirics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">J. Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.03550v1-abstract-short" style="display: inline;"> The expected gradient outerproduct (EGOP) of an unknown regression function is an operator that arises in the theory of multi-index regression, and is known to recover those directions that are most relevant to predicting the output. However, work on the EGOP, including that on its cheap estimators, is restricted to the regression setting. In this work, we adapt this operator to the multi-class se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.03550v1-abstract-full').style.display = 'inline'; document.getElementById('2006.03550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.03550v1-abstract-full" style="display: none;"> The expected gradient outerproduct (EGOP) of an unknown regression function is an operator that arises in the theory of multi-index regression, and is known to recover those directions that are most relevant to predicting the output. However, work on the EGOP, including that on its cheap estimators, is restricted to the regression setting. In this work, we adapt this operator to the multi-class setting, which we dub the expected Jacobian outerproduct (EJOP). Moreover, we propose a simple rough estimator of the EJOP and show that somewhat surprisingly, it remains statistically consistent under mild assumptions. Furthermore, we show that the eigenvalues and eigenspaces also remain consistent. Finally, we show that the estimated EJOP can be used as a metric to yield improvements in real-world non-parametric classification tasks: both by its use as a metric, and also as cheap initialization in metric learning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.03550v1-abstract-full').style.display = 'none'; document.getElementById('2006.03550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.05796">arXiv:1911.05796</a> <span> [<a href="https://arxiv.org/pdf/1911.05796">pdf</a>, <a href="https://arxiv.org/ps/1911.05796">ps</a>, <a href="https://arxiv.org/format/1911.05796">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> </div> <p class="title is-5 mathjax"> Response to NITRD, NCO, NSF Request for Information on "Update to the 2016 National Artificial Intelligence Research and Development Strategic Plan" </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Amundson%2C+J">J. Amundson</a>, <a href="/search/cs?searchtype=author&query=Annis%2C+J">J. Annis</a>, <a href="/search/cs?searchtype=author&query=Avestruz%2C+C">C. Avestruz</a>, <a href="/search/cs?searchtype=author&query=Bowring%2C+D">D. Bowring</a>, <a href="/search/cs?searchtype=author&query=Caldeira%2C+J">J. Caldeira</a>, <a href="/search/cs?searchtype=author&query=Cerati%2C+G">G. Cerati</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+C">C. Chang</a>, <a href="/search/cs?searchtype=author&query=Dodelson%2C+S">S. Dodelson</a>, <a href="/search/cs?searchtype=author&query=Elvira%2C+D">D. Elvira</a>, <a href="/search/cs?searchtype=author&query=Farahi%2C+A">A. Farahi</a>, <a href="/search/cs?searchtype=author&query=Genser%2C+K">K. Genser</a>, <a href="/search/cs?searchtype=author&query=Gray%2C+L">L. Gray</a>, <a href="/search/cs?searchtype=author&query=Gutsche%2C+O">O. Gutsche</a>, <a href="/search/cs?searchtype=author&query=Harris%2C+P">P. Harris</a>, <a href="/search/cs?searchtype=author&query=Kinney%2C+J">J. Kinney</a>, <a href="/search/cs?searchtype=author&query=Kowalkowski%2C+J+B">J. B. Kowalkowski</a>, <a href="/search/cs?searchtype=author&query=Kutschke%2C+R">R. Kutschke</a>, <a href="/search/cs?searchtype=author&query=Mrenna%2C+S">S. Mrenna</a>, <a href="/search/cs?searchtype=author&query=Nord%2C+B">B. Nord</a>, <a href="/search/cs?searchtype=author&query=Para%2C+A">A. Para</a>, <a href="/search/cs?searchtype=author&query=Pedro%2C+K">K. Pedro</a>, <a href="/search/cs?searchtype=author&query=Perdue%2C+G+N">G. N. Perdue</a>, <a href="/search/cs?searchtype=author&query=Scheinker%2C+A">A. Scheinker</a>, <a href="/search/cs?searchtype=author&query=Spentzouris%2C+P">P. Spentzouris</a>, <a href="/search/cs?searchtype=author&query=John%2C+J+S">J. St. John</a> , et al. (5 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.05796v1-abstract-short" style="display: inline;"> We present a response to the 2018 Request for Information (RFI) from the NITRD, NCO, NSF regarding the "Update to the 2016 National Artificial Intelligence Research and Development Strategic Plan." Through this document, we provide a response to the question of whether and how the National Artificial Intelligence Research and Development Strategic Plan (NAIRDSP) should be updated from the perspect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.05796v1-abstract-full').style.display = 'inline'; document.getElementById('1911.05796v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.05796v1-abstract-full" style="display: none;"> We present a response to the 2018 Request for Information (RFI) from the NITRD, NCO, NSF regarding the "Update to the 2016 National Artificial Intelligence Research and Development Strategic Plan." Through this document, we provide a response to the question of whether and how the National Artificial Intelligence Research and Development Strategic Plan (NAIRDSP) should be updated from the perspective of Fermilab, America's premier national laboratory for High Energy Physics (HEP). We believe the NAIRDSP should be extended in light of the rapid pace of development and innovation in the field of Artificial Intelligence (AI) since 2016, and present our recommendations below. AI has profoundly impacted many areas of human life, promising to dramatically reshape society --- e.g., economy, education, science --- in the coming years. We are still early in this process. It is critical to invest now in this technology to ensure it is safe and deployed ethically. Science and society both have a strong need for accuracy, efficiency, transparency, and accountability in algorithms, making investments in scientific AI particularly valuable. Thus far the US has been a leader in AI technologies, and we believe as a national Laboratory it is crucial to help maintain and extend this leadership. Moreover, investments in AI will be important for maintaining US leadership in the physical sciences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.05796v1-abstract-full').style.display = 'none'; document.getElementById('1911.05796v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> FERMILAB-FN-1092-SCD </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.05132">arXiv:1910.05132</a> <span> [<a href="https://arxiv.org/pdf/1910.05132">pdf</a>, <a href="https://arxiv.org/format/1910.05132">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Asymmetric Multiresolution Matrix Factorization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mudrakarta%2C+P+K">Pramod Kaushik Mudrakarta</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Kondor%2C+R">Risi Kondor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.05132v1-abstract-short" style="display: inline;"> Multiresolution Matrix Factorization (MMF) was recently introduced as an alternative to the dominant low-rank paradigm in order to capture structure in matrices at multiple different scales. Using ideas from multiresolution analysis (MRA), MMF teased out hierarchical structure in symmetric matrices by constructing a sequence of wavelet bases. While effective for such matrices, there is plenty of d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.05132v1-abstract-full').style.display = 'inline'; document.getElementById('1910.05132v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.05132v1-abstract-full" style="display: none;"> Multiresolution Matrix Factorization (MMF) was recently introduced as an alternative to the dominant low-rank paradigm in order to capture structure in matrices at multiple different scales. Using ideas from multiresolution analysis (MRA), MMF teased out hierarchical structure in symmetric matrices by constructing a sequence of wavelet bases. While effective for such matrices, there is plenty of data that is more naturally represented as nonsymmetric matrices (e.g. directed graphs), but nevertheless has similar hierarchical structure. In this paper, we explore techniques for extending MMF to any square matrix. We validate our approach on numerous matrix compression tasks, demonstrating its efficacy compared to low-rank methods. Moreover, we also show that a combined low-rank and MMF approach, which amounts to removing a small global-scale component of the matrix and then extracting hierarchical structure from the residual, is even more effective than each of the two complementary methods for matrix compression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.05132v1-abstract-full').style.display = 'none'; document.getElementById('1910.05132v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preliminary work</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.04648">arXiv:1909.04648</a> <span> [<a href="https://arxiv.org/pdf/1909.04648">pdf</a>, <a href="https://arxiv.org/format/1909.04648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Soft Condensed Matter">cond-mat.soft</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning for Automated Classification and Characterization of Amorphous Materials </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Swanson%2C+K">Kirk Swanson</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Lequieu%2C+J">Joshua Lequieu</a>, <a href="/search/cs?searchtype=author&query=Swanson%2C+K">Kyle Swanson</a>, <a href="/search/cs?searchtype=author&query=Kondor%2C+R">Risi Kondor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.04648v1-abstract-short" style="display: inline;"> It is difficult to quantify structure-property relationships and to identify structural features of complex materials. The characterization of amorphous materials is especially challenging because their lack of long-range order makes it difficult to define structural metrics. In this work, we apply deep learning algorithms to accurately classify amorphous materials and characterize their structura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.04648v1-abstract-full').style.display = 'inline'; document.getElementById('1909.04648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.04648v1-abstract-full" style="display: none;"> It is difficult to quantify structure-property relationships and to identify structural features of complex materials. The characterization of amorphous materials is especially challenging because their lack of long-range order makes it difficult to define structural metrics. In this work, we apply deep learning algorithms to accurately classify amorphous materials and characterize their structural features. Specifically, we show that convolutional neural networks and message passing neural networks can classify two-dimensional liquids and liquid-cooled glasses from molecular dynamics simulations with greater than 0.98 AUC, with no a priori assumptions about local particle relationships, even when the liquids and glasses are prepared at the same inherent structure energy. Furthermore, we demonstrate that message passing neural networks surpass convolutional neural networks in this context in both accuracy and interpretability. We extract a clear interpretation of how message passing neural networks evaluate liquid and glass structures by using a self-attention mechanism. Using this interpretation, we derive three novel structural metrics that accurately characterize glass formation. The methods presented here provide us with a procedure to identify important structural features in materials that could be missed by standard techniques and give us a unique insight into how these neural networks process data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.04648v1-abstract-full').style.display = 'none'; document.getElementById('1909.04648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.01483">arXiv:1810.01483</a> <span> [<a href="https://arxiv.org/pdf/1810.01483">pdf</a>, <a href="https://arxiv.org/format/1810.01483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cosmology and Nongalactic Astrophysics">astro-ph.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.ascom.2019.100307">10.1016/j.ascom.2019.100307 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DeepCMB: Lensing Reconstruction of the Cosmic Microwave Background with Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Caldeira%2C+J">Jo茫o Caldeira</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W+L+K">W. L. Kimmy Wu</a>, <a href="/search/cs?searchtype=author&query=Nord%2C+B">Brian Nord</a>, <a href="/search/cs?searchtype=author&query=Avestruz%2C+C">Camille Avestruz</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Story%2C+K+T">Kyle T. Story</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.01483v3-abstract-short" style="display: inline;"> Next-generation cosmic microwave background (CMB) experiments will have lower noise and therefore increased sensitivity, enabling improved constraints on fundamental physics parameters such as the sum of neutrino masses and the tensor-to-scalar ratio r. Achieving competitive constraints on these parameters requires high signal-to-noise extraction of the projected gravitational potential from the C… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.01483v3-abstract-full').style.display = 'inline'; document.getElementById('1810.01483v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.01483v3-abstract-full" style="display: none;"> Next-generation cosmic microwave background (CMB) experiments will have lower noise and therefore increased sensitivity, enabling improved constraints on fundamental physics parameters such as the sum of neutrino masses and the tensor-to-scalar ratio r. Achieving competitive constraints on these parameters requires high signal-to-noise extraction of the projected gravitational potential from the CMB maps. Standard methods for reconstructing the lensing potential employ the quadratic estimator (QE). However, the QE performs suboptimally at the low noise levels expected in upcoming experiments. Other methods, like maximum likelihood estimators (MLE), are under active development. In this work, we demonstrate reconstruction of the CMB lensing potential with deep convolutional neural networks (CNN) - ie, a ResUNet. The network is trained and tested on simulated data, and otherwise has no physical parametrization related to the physical processes of the CMB and gravitational lensing. We show that, over a wide range of angular scales, ResUNets recover the input gravitational potential with a higher signal-to-noise ratio than the QE method, reaching levels comparable to analytic approximations of MLE methods. We demonstrate that the network outputs quantifiably different lensing maps when given input CMB maps generated with different cosmologies. We also show we can use the reconstructed lensing map for cosmological parameter estimation. This application of CNN provides a few innovations at the intersection of cosmology and machine learning. First, while training and regressing on images, we predict a continuous-variable field rather than discrete classes. Second, we are able to establish uncertainty measures for the network output that are analogous to standard methods. We expect this approach to excel in capturing hard-to-model non-Gaussian astrophysical foreground and noise contributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.01483v3-abstract-full').style.display = 'none'; document.getElementById('1810.01483v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages; LaTeX; 12 figures; changes to match published version</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> FERMILAB-PUB-18-515-A-CD </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Astronomy and Computing 28 100307 (2019) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1808.10078">arXiv:1808.10078</a> <span> [<a href="https://arxiv.org/pdf/1808.10078">pdf</a>, <a href="https://arxiv.org/format/1808.10078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Discriminative Learning of Similarity and Group Equivariant Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1808.10078v2-abstract-short" style="display: inline;"> One of the most fundamental problems in machine learning is to compare examples: Given a pair of objects we want to return a value which indicates degree of (dis)similarity. Similarity is often task specific, and pre-defined distances can perform poorly, leading to work in metric learning. However, being able to learn a similarity-sensitive distance function also presupposes access to a rich, disc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1808.10078v2-abstract-full').style.display = 'inline'; document.getElementById('1808.10078v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1808.10078v2-abstract-full" style="display: none;"> One of the most fundamental problems in machine learning is to compare examples: Given a pair of objects we want to return a value which indicates degree of (dis)similarity. Similarity is often task specific, and pre-defined distances can perform poorly, leading to work in metric learning. However, being able to learn a similarity-sensitive distance function also presupposes access to a rich, discriminative representation for the objects at hand. In this dissertation we present contributions towards both ends. In the first part of the thesis, assuming good representations for the data, we present a formulation for metric learning that makes a more direct attempt to optimize for the k-NN accuracy as compared to prior work. We also present extensions of this formulation to metric learning for kNN regression, asymmetric similarity learning and discriminative learning of Hamming distance. In the second part, we consider a situation where we are on a limited computational budget i.e. optimizing over a space of possible metrics would be infeasible, but access to a label aware distance metric is still desirable. We present a simple, and computationally inexpensive approach for estimating a well motivated metric that relies only on gradient estimates, discussing theoretical and experimental results. In the final part, we address representational issues, considering group equivariant convolutional neural networks (GCNNs). Equivariance to symmetry transformations is explicitly encoded in GCNNs; a classical CNN being the simplest example. In particular, we present a SO(3)-equivariant neural network architecture for spherical data, that operates entirely in Fourier space, while also providing a formalism for the design of fully Fourier neural networks that are equivariant to the action of any continuous compact group. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1808.10078v2-abstract-full').style.display = 'none'; document.getElementById('1808.10078v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 August, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">PhD thesis, September 2018 [Previous version had a compile error that was spotted recently, which is fixed. The uploaded version is the final thesis that was submitted in September 2018]</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.09231">arXiv:1806.09231</a> <span> [<a href="https://arxiv.org/pdf/1806.09231">pdf</a>, <a href="https://arxiv.org/format/1806.09231">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Clebsch-Gordan Nets: a Fully Fourier Space Spherical Convolutional Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kondor%2C+R">Risi Kondor</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhen Lin</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.09231v2-abstract-short" style="display: inline;"> Recent work by Cohen \emph{et al.} has achieved state-of-the-art results for learning spherical images in a rotation invariant way by using ideas from group representation theory and noncommutative harmonic analysis. In this paper we propose a generalization of this work that generally exhibits improved performace, but from an implementation point of view is actually simpler. An unusual feature of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.09231v2-abstract-full').style.display = 'inline'; document.getElementById('1806.09231v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.09231v2-abstract-full" style="display: none;"> Recent work by Cohen \emph{et al.} has achieved state-of-the-art results for learning spherical images in a rotation invariant way by using ideas from group representation theory and noncommutative harmonic analysis. In this paper we propose a generalization of this work that generally exhibits improved performace, but from an implementation point of view is actually simpler. An unusual feature of the proposed architecture is that it uses the Clebsch--Gordan transform as its only source of nonlinearity, thus avoiding repeated forward and backward Fourier transforms. The underlying ideas of the paper generalize to constructing neural networks that are invariant to the action of other compact groups. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.09231v2-abstract-full').style.display = 'none'; document.getElementById('1806.09231v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Camera ready version for the proceedings of the thirty-second conference on Neural Information Processing Systems (NIPS), Montreal, Canada, 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.03690">arXiv:1802.03690</a> <span> [<a href="https://arxiv.org/pdf/1802.03690">pdf</a>, <a href="https://arxiv.org/ps/1802.03690">ps</a>, <a href="https://arxiv.org/format/1802.03690">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Generalization of Equivariance and Convolution in Neural Networks to the Action of Compact Groups </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kondor%2C+R">Risi Kondor</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.03690v3-abstract-short" style="display: inline;"> Convolutional neural networks have been extremely successful in the image recognition domain because they ensure equivariance to translations. There have been many recent attempts to generalize this framework to other domains, including graphs and data lying on manifolds. In this paper we give a rigorous, theoretical treatment of convolution and equivariance in neural networks with respect to not… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.03690v3-abstract-full').style.display = 'inline'; document.getElementById('1802.03690v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.03690v3-abstract-full" style="display: none;"> Convolutional neural networks have been extremely successful in the image recognition domain because they ensure equivariance to translations. There have been many recent attempts to generalize this framework to other domains, including graphs and data lying on manifolds. In this paper we give a rigorous, theoretical treatment of convolution and equivariance in neural networks with respect to not just translations, but the action of any compact group. Our main result is to prove that (given some natural constraints) convolutional structure is not just a sufficient, but also a necessary condition for equivariance to the action of a compact group. Our exposition makes use of concepts from representation theory and noncommutative harmonic analysis and derives new generalized convolution formulae. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.03690v3-abstract-full').style.display = 'none'; document.getElementById('1802.03690v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Final version that appeared in the proceedings of the 35th International Conference on Machine Learning (ICML 2018), Stockholm, Sweden</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.02144">arXiv:1801.02144</a> <span> [<a href="https://arxiv.org/pdf/1801.02144">pdf</a>, <a href="https://arxiv.org/format/1801.02144">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Covariant Compositional Networks For Learning Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kondor%2C+R">Risi Kondor</a>, <a href="/search/cs?searchtype=author&query=Son%2C+H+T">Hy Truong Son</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+H">Horace Pan</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+B">Brandon Anderson</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.02144v1-abstract-short" style="display: inline;"> Most existing neural networks for learning graphs address permutation invariance by conceiving of the network as a message passing scheme, where each node sums the feature vectors coming from its neighbors. We argue that this imposes a limitation on their representation power, and instead propose a new general architecture for representing objects consisting of a hierarchy of parts, which we call… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.02144v1-abstract-full').style.display = 'inline'; document.getElementById('1801.02144v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.02144v1-abstract-full" style="display: none;"> Most existing neural networks for learning graphs address permutation invariance by conceiving of the network as a message passing scheme, where each node sums the feature vectors coming from its neighbors. We argue that this imposes a limitation on their representation power, and instead propose a new general architecture for representing objects consisting of a hierarchy of parts, which we call Covariant Compositional Networks (CCNs). Here, covariance means that the activation of each neuron must transform in a specific way under permutations, similarly to steerability in CNNs. We achieve covariance by making each activation transform according to a tensor representation of the permutation group, and derive the corresponding tensor aggregation rules that each neuron must implement. Experiments show that CCNs can outperform competing methods on standard graph learning benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.02144v1-abstract-full').style.display = 'none'; document.getElementById('1801.02144v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1509.06163">arXiv:1509.06163</a> <span> [<a href="https://arxiv.org/pdf/1509.06163">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Utility of Clustering in Prediction Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a>, <a href="/search/cs?searchtype=author&query=Pardos%2C+Z+A">Zachary A. Pardos</a>, <a href="/search/cs?searchtype=author&query=Heffernan%2C+N+T">Neil T. Heffernan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1509.06163v1-abstract-short" style="display: inline;"> We explore the utility of clustering in reducing error in various prediction tasks. Previous work has hinted at the improvement in prediction accuracy attributed to clustering algorithms if used to pre-process the data. In this work we more deeply investigate the direct utility of using clustering to improve prediction accuracy and provide explanations for why this may be so. We look at a number o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1509.06163v1-abstract-full').style.display = 'inline'; document.getElementById('1509.06163v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1509.06163v1-abstract-full" style="display: none;"> We explore the utility of clustering in reducing error in various prediction tasks. Previous work has hinted at the improvement in prediction accuracy attributed to clustering algorithms if used to pre-process the data. In this work we more deeply investigate the direct utility of using clustering to improve prediction accuracy and provide explanations for why this may be so. We look at a number of datasets, run k-means at different scales and for each scale we train predictors. This produces k sets of predictions. These predictions are then combined by a na茂ve ensemble. We observed that this use of a predictor in conjunction with clustering improved the prediction accuracy in most datasets. We believe this indicates the predictive utility of exploiting structure in the data and the data compression handed over by clustering. We also found that using this method improves upon the prediction of even a Random Forests predictor which suggests this method is providing a novel, and useful source of variance in the prediction process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1509.06163v1-abstract-full').style.display = 'none'; document.getElementById('1509.06163v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">An experimental research report, dated 11 September 2011</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1405.6216">arXiv:1405.6216</a> <span> [<a href="https://arxiv.org/pdf/1405.6216">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5121/ijcnc.2014.6102">10.5121/ijcnc.2014.6102 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> NDTAODV: Neighbor Defense Technique for Ad Hoc On-Demand Distance Vector(AODV) to mitigate flood attack in MANETS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aggarwal%2C+A">Akshai Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Gandhi%2C+S">Savita Gandhi</a>, <a href="/search/cs?searchtype=author&query=Chaubey%2C+N">Nirbhay Chaubey</a>, <a href="/search/cs?searchtype=author&query=Tada%2C+N">Naren Tada</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Srushti Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1405.6216v1-abstract-short" style="display: inline;"> Mobile Ad Hoc Networks (MANETs) are collections of mobile nodes that can communicate with one another using multihop wireless links. MANETs are often deployed in the environments, where there is no fixed infrastructure and centralized management. The nodes of mobile ad hoc networks are susceptible to compromise. In such a scenario, designing an efficient, reliable and secure routing protocol has b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1405.6216v1-abstract-full').style.display = 'inline'; document.getElementById('1405.6216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1405.6216v1-abstract-full" style="display: none;"> Mobile Ad Hoc Networks (MANETs) are collections of mobile nodes that can communicate with one another using multihop wireless links. MANETs are often deployed in the environments, where there is no fixed infrastructure and centralized management. The nodes of mobile ad hoc networks are susceptible to compromise. In such a scenario, designing an efficient, reliable and secure routing protocol has been a major challengesue over the last many years. The routing protocol Ad hoc On-demand Distance Vector (AODV) has no security measures in-built in it. It is vulnerable to many types of routing attacks. The flood attack is one of them. In this paper, we propose a simple and effective technique to secure Ad hoc Ondemand Distance Vector (AODV) routing protocol against flood attacks. To deal with a flood attack, we have proposed Neighbor Defense Technique for Ad hoc On-demand Distance Vector (NDTAODV). This makes AODV more robust. The proposed technique has been designed to isolate the flood attacker with the use of timers, peak value and hello alarm technique. We have simulated our work in Network Simulator NS-2.33 (NS-2) with different pause times by way of different number of malicious nodes. We have compared the performance of NDTAODV with the AODV in normal situation as well as in the presence of malicious attacks. We have considered Packet Delivery Fraction (PDF), Average Throughput (AT) and Normalized Routing Load (NRL) for comparing the performance of NDTAODV and AODV. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1405.6216v1-abstract-full').style.display = 'none'; document.getElementById('1405.6216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 Pages, 13 Figure. arXiv admin note: text overlap with arXiv:1202.4628 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1209.6540">arXiv:1209.6540</a> <span> [<a href="https://arxiv.org/pdf/1209.6540">pdf</a>, <a href="https://arxiv.org/format/1209.6540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> A Practical Regularity Partitioning Algorithm and its Applications in Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=S%C3%A1rk%C3%B6zy%2C+G+N">G谩bor N. S谩rk枚zy</a>, <a href="/search/cs?searchtype=author&query=Song%2C+F">Fei Song</a>, <a href="/search/cs?searchtype=author&query=Szemer%C3%A9di%2C+E">Endre Szemer茅di</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+S">Shubhendu Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1209.6540v1-abstract-short" style="display: inline;"> In this paper we introduce a new clustering technique called Regularity Clustering. This new technique is based on the practical variants of the two constructive versions of the Regularity Lemma, a very useful tool in graph theory. The lemma claims that every graph can be partitioned into pseudo-random graphs. While the Regularity Lemma has become very important in proving theoretical results, it… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1209.6540v1-abstract-full').style.display = 'inline'; document.getElementById('1209.6540v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1209.6540v1-abstract-full" style="display: none;"> In this paper we introduce a new clustering technique called Regularity Clustering. This new technique is based on the practical variants of the two constructive versions of the Regularity Lemma, a very useful tool in graph theory. The lemma claims that every graph can be partitioned into pseudo-random graphs. While the Regularity Lemma has become very important in proving theoretical results, it has no direct practical applications so far. An important reason for this lack of practical applications is that the graph under consideration has to be astronomically large. This requirement makes its application restrictive in practice where graphs typically are much smaller. In this paper we propose modifications of the constructive versions of the Regularity Lemma that work for smaller graphs as well. We call this the Practical Regularity partitioning algorithm. The partition obtained by this is used to build the reduced graph which can be viewed as a compressed representation of the original graph. Then we apply a pairwise clustering method such as spectral clustering on this reduced graph to get a clustering of the original graph that we call Regularity Clustering. We present results of using Regularity Clustering on a number of benchmark datasets and compare them with standard clustering techniques, such as $k$-means and spectral clustering. These empirical results are very encouraging. Thus in this paper we report an attempt to harness the power of the Regularity Lemma for real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1209.6540v1-abstract-full').style.display = 'none'; document.getElementById('1209.6540v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2012. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository