CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 136 results for author: <span class="mathjax">Taylor, G</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Taylor%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Taylor, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Taylor%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Taylor, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15416">arXiv:2410.15416</a> <span> [<a href="https://arxiv.org/pdf/2410.15416">pdf</a>, <a href="https://arxiv.org/format/2410.15416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Contrastive Learning for Time Series Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shamba%2C+A">Abdul-Kazeem Shamba</a>, <a href="/search/cs?searchtype=author&query=Bach%2C+K">Kerstin Bach</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15416v1-abstract-short" style="display: inline;"> Understanding events in time series is an important task in a variety of contexts. However, human analysis and labeling are expensive and time-consuming. Therefore, it is advantageous to learn embeddings for moments in time series in an unsupervised way, which allows for good performance in classification or detection tasks after later minimal human labeling. In this paper, we propose dynamic cont… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15416v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15416v1-abstract-full" style="display: none;"> Understanding events in time series is an important task in a variety of contexts. However, human analysis and labeling are expensive and time-consuming. Therefore, it is advantageous to learn embeddings for moments in time series in an unsupervised way, which allows for good performance in classification or detection tasks after later minimal human labeling. In this paper, we propose dynamic contrastive learning (DynaCL), an unsupervised contrastive representation learning framework for time series that uses temporal adjacent steps to define positive pairs. DynaCL adopts N-pair loss to dynamically treat all samples in a batch as positive or negative pairs, enabling efficient training and addressing the challenges of complicated sampling of positives. We demonstrate that DynaCL embeds instances from time series into semantically meaningful clusters, which allows superior performance on downstream tasks on a variety of public time series datasets. Our findings also reveal that high scores on unsupervised clustering metrics do not guarantee that the representations are useful in downstream tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15416v1-abstract-full').style.display = 'none'; document.getElementById('2410.15416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11923">arXiv:2409.11923</a> <span> [<a href="https://arxiv.org/pdf/2409.11923">pdf</a>, <a href="https://arxiv.org/format/2409.11923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Agglomerative Token Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Escalera%2C+S">Sergio Escalera</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Moeslund%2C+T+B">Thomas B. Moeslund</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11923v1-abstract-short" style="display: inline;"> We present Agglomerative Token Clustering (ATC), a novel token merging method that consistently outperforms previous token merging and pruning methods across image classification, image synthesis, and object detection & segmentation tasks. ATC merges clusters through bottom-up hierarchical clustering, without the introduction of extra learnable parameters. We find that ATC achieves state-of-the-ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11923v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11923v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11923v1-abstract-full" style="display: none;"> We present Agglomerative Token Clustering (ATC), a novel token merging method that consistently outperforms previous token merging and pruning methods across image classification, image synthesis, and object detection & segmentation tasks. ATC merges clusters through bottom-up hierarchical clustering, without the introduction of extra learnable parameters. We find that ATC achieves state-of-the-art performance across all tasks, and can even perform on par with prior state-of-the-art when applied off-the-shelf, i.e. without fine-tuning. ATC is particularly effective when applied with low keep rates, where only a small fraction of tokens are kept and retaining task performance is especially difficult. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11923v1-abstract-full').style.display = 'none'; document.getElementById('2409.11923v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024. Project webpage at https://vap.aau.dk/atc/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15556">arXiv:2406.15556</a> <span> [<a href="https://arxiv.org/pdf/2406.15556">pdf</a>, <a href="https://arxiv.org/format/2406.15556">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Open-Vocabulary Temporal Action Localization using Multimodal Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Akshita Gupta</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+A">Aditya Arora</a>, <a href="/search/cs?searchtype=author&query=Narayan%2C+S">Sanath Narayan</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+S">Salman Khan</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+F+S">Fahad Shahbaz Khan</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15556v1-abstract-short" style="display: inline;"> Open-Vocabulary Temporal Action Localization (OVTAL) enables a model to recognize any desired action category in videos without the need to explicitly curate training data for all categories. However, this flexibility poses significant challenges, as the model must recognize not only the action categories seen during training but also novel categories specified at inference. Unlike standard tempor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15556v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15556v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15556v1-abstract-full" style="display: none;"> Open-Vocabulary Temporal Action Localization (OVTAL) enables a model to recognize any desired action category in videos without the need to explicitly curate training data for all categories. However, this flexibility poses significant challenges, as the model must recognize not only the action categories seen during training but also novel categories specified at inference. Unlike standard temporal action localization, where training and test categories are predetermined, OVTAL requires understanding contextual cues that reveal the semantics of novel categories. To address these challenges, we introduce OVFormer, a novel open-vocabulary framework extending ActionFormer with three key contributions. First, we employ task-specific prompts as input to a large language model to obtain rich class-specific descriptions for action categories. Second, we introduce a cross-attention mechanism to learn the alignment between class representations and frame-level video features, facilitating the multimodal guided features. Third, we propose a two-stage training strategy which includes training with a larger vocabulary dataset and finetuning to downstream data to generalize to novel categories. OVFormer extends existing TAL methods to open-vocabulary settings. Comprehensive evaluations on the THUMOS14 and ActivityNet-1.3 benchmarks demonstrate the effectiveness of our method. Code and pretrained models will be publicly released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15556v1-abstract-full').style.display = 'none'; document.getElementById('2406.15556v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12723">arXiv:2406.12723</a> <span> [<a href="https://arxiv.org/pdf/2406.12723">pdf</a>, <a href="https://arxiv.org/format/2406.12723">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BIOSCAN-5M: A Multimodal Dataset for Insect Biodiversity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gharaee%2C+Z">Zahra Gharaee</a>, <a href="/search/cs?searchtype=author&query=Lowe%2C+S+C">Scott C. Lowe</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Z">ZeMing Gong</a>, <a href="/search/cs?searchtype=author&query=Arias%2C+P+M">Pablo Millan Arias</a>, <a href="/search/cs?searchtype=author&query=Pellegrino%2C+N">Nicholas Pellegrino</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A+T">Austin T. Wang</a>, <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Zarubiieva%2C+I">Iuliia Zarubiieva</a>, <a href="/search/cs?searchtype=author&query=Kari%2C+L">Lila Kari</a>, <a href="/search/cs?searchtype=author&query=Steinke%2C+D">Dirk Steinke</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Fieguth%2C+P">Paul Fieguth</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+A+X">Angel X. Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12723v4-abstract-short" style="display: inline;"> As part of an ongoing worldwide effort to comprehend and monitor insect biodiversity, this paper presents the BIOSCAN-5M Insect dataset to the machine learning community and establish several benchmark tasks. BIOSCAN-5M is a comprehensive dataset containing multi-modal information for over 5 million insect specimens, and it significantly expands existing image-based biological datasets by includin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12723v4-abstract-full').style.display = 'inline'; document.getElementById('2406.12723v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12723v4-abstract-full" style="display: none;"> As part of an ongoing worldwide effort to comprehend and monitor insect biodiversity, this paper presents the BIOSCAN-5M Insect dataset to the machine learning community and establish several benchmark tasks. BIOSCAN-5M is a comprehensive dataset containing multi-modal information for over 5 million insect specimens, and it significantly expands existing image-based biological datasets by including taxonomic labels, raw nucleotide barcode sequences, assigned barcode index numbers, geographical, and size information. We propose three benchmark experiments to demonstrate the impact of the multi-modal data types on the classification and clustering accuracy. First, we pretrain a masked language model on the DNA barcode sequences of the BIOSCAN-5M dataset, and demonstrate the impact of using this large reference library on species- and genus-level classification performance. Second, we propose a zero-shot transfer learning task applied to images and DNA barcodes to cluster feature embeddings obtained from self-supervised learning, to investigate whether meaningful clusters can be derived from these representation embeddings. Third, we benchmark multi-modality by performing contrastive learning on DNA barcodes, image data, and taxonomic information. This yields a general shared embedding space enabling taxonomic classification using multiple types of information and modalities. The code repository of the BIOSCAN-5M Insect dataset is available at https://github.com/bioscan-ml/BIOSCAN-5M. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12723v4-abstract-full').style.display = 'none'; document.getElementById('2406.12723v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02465">arXiv:2406.02465</a> <span> [<a href="https://arxiv.org/pdf/2406.02465">pdf</a>, <a href="https://arxiv.org/format/2406.02465">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Study into Clustering of Unseen Datasets with Self-Supervised Encoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lowe%2C+S+C">Scott C. Lowe</a>, <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Oore%2C+S">Sageev Oore</a>, <a href="/search/cs?searchtype=author&query=Moeslund%2C+T+B">Thomas B. Moeslund</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02465v1-abstract-short" style="display: inline;"> Can pretrained models generalize to new datasets without any retraining? We deploy pretrained image models on datasets they were not trained for, and investigate whether their embeddings form meaningful clusters. Our suite of benchmarking experiments use encoders pretrained solely on ImageNet-1k with either supervised or self-supervised training techniques, deployed on image datasets that were not… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02465v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02465v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02465v1-abstract-full" style="display: none;"> Can pretrained models generalize to new datasets without any retraining? We deploy pretrained image models on datasets they were not trained for, and investigate whether their embeddings form meaningful clusters. Our suite of benchmarking experiments use encoders pretrained solely on ImageNet-1k with either supervised or self-supervised training techniques, deployed on image datasets that were not seen during training, and clustered with conventional clustering algorithms. This evaluation provides new insights into the embeddings of self-supervised models, which prioritize different features to supervised models. Supervised encoders typically offer more utility than SSL encoders within the training domain, and vice-versa far outside of it, however, fine-tuned encoders demonstrate the opposite trend. Clustering provides a way to evaluate the utility of self-supervised learned representations orthogonal to existing methods such as kNN. Additionally, we find the silhouette score when measured in a UMAP-reduced space is highly correlated with clustering performance, and can therefore be used as a proxy for clustering performance on data with no ground truth labels. Our code implementation is available at \url{https://github.com/scottclowe/zs-ssl-clustering/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02465v1-abstract-full').style.display = 'none'; document.getElementById('2406.02465v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01416">arXiv:2406.01416</a> <span> [<a href="https://arxiv.org/pdf/2406.01416">pdf</a>, <a href="https://arxiv.org/format/2406.01416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Adapting Conformal Prediction to Distribution Shifts Without Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kasa%2C+K">Kevin Kasa</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Heng Yang</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01416v1-abstract-short" style="display: inline;"> Conformal prediction (CP) enables machine learning models to output prediction sets with guaranteed coverage rate, assuming exchangeable data. Unfortunately, the exchangeability assumption is frequently violated due to distribution shifts in practice, and the challenge is often compounded by the lack of ground truth labels at test time. Focusing on classification in this paper, our goal is to impr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01416v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01416v1-abstract-full" style="display: none;"> Conformal prediction (CP) enables machine learning models to output prediction sets with guaranteed coverage rate, assuming exchangeable data. Unfortunately, the exchangeability assumption is frequently violated due to distribution shifts in practice, and the challenge is often compounded by the lack of ground truth labels at test time. Focusing on classification in this paper, our goal is to improve the quality of CP-generated prediction sets using only unlabeled data from the test domain. This is achieved by two new methods called ECP and EACP, that adjust the score function in CP according to the base model's uncertainty on the unlabeled test data. Through extensive experiments on a number of large-scale datasets and neural network architectures, we show that our methods provide consistent improvement over existing baselines and nearly match the performance of supervised algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01416v1-abstract-full').style.display = 'none'; document.getElementById('2406.01416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17537">arXiv:2405.17537</a> <span> [<a href="https://arxiv.org/pdf/2405.17537">pdf</a>, <a href="https://arxiv.org/format/2405.17537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CLIBD: Bridging Vision and Genomics for Biodiversity Monitoring at Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+Z">ZeMing Gong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A+T">Austin T. Wang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+X">Xiaoliang Huo</a>, <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Lowe%2C+S+C">Scott C. Lowe</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+A+X">Angel X. Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17537v3-abstract-short" style="display: inline;"> Measuring biodiversity is crucial for understanding ecosystem health. While prior works have developed machine learning models for taxonomic classification of photographic images and DNA separately, in this work, we introduce a multimodal approach combining both, using CLIP-style contrastive learning to align images, barcode DNA, and text-based representations of taxonomic labels in a unified embe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17537v3-abstract-full').style.display = 'inline'; document.getElementById('2405.17537v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17537v3-abstract-full" style="display: none;"> Measuring biodiversity is crucial for understanding ecosystem health. While prior works have developed machine learning models for taxonomic classification of photographic images and DNA separately, in this work, we introduce a multimodal approach combining both, using CLIP-style contrastive learning to align images, barcode DNA, and text-based representations of taxonomic labels in a unified embedding space. This allows for accurate classification of both known and unknown insect species without task-specific fine-tuning, leveraging contrastive learning for the first time to fuse DNA and image data. Our method surpasses previous single-modality approaches in accuracy by over 8% on zero-shot learning tasks, showcasing its effectiveness in biodiversity studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17537v3-abstract-full').style.display = 'none'; document.getElementById('2405.17537v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages with 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01282">arXiv:2404.01282</a> <span> [<a href="https://arxiv.org/pdf/2404.01282">pdf</a>, <a href="https://arxiv.org/format/2404.01282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LoSA: Long-Short-range Adapter for Scaling End-to-End Temporal Action Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Akshita Gupta</a>, <a href="/search/cs?searchtype=author&query=Mittal%2C+G">Gaurav Mittal</a>, <a href="/search/cs?searchtype=author&query=Magooda%2C+A">Ahmed Magooda</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Ye Yu</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01282v2-abstract-short" style="display: inline;"> Temporal Action Localization (TAL) involves localizing and classifying action snippets in an untrimmed video. The emergence of large video foundation models has led RGB-only video backbones to outperform previous methods needing both RGB and optical flow modalities. Leveraging these large models is often limited to training only the TAL head due to the prohibitively large GPU memory required to ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01282v2-abstract-full').style.display = 'inline'; document.getElementById('2404.01282v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01282v2-abstract-full" style="display: none;"> Temporal Action Localization (TAL) involves localizing and classifying action snippets in an untrimmed video. The emergence of large video foundation models has led RGB-only video backbones to outperform previous methods needing both RGB and optical flow modalities. Leveraging these large models is often limited to training only the TAL head due to the prohibitively large GPU memory required to adapt the video backbone for TAL. To overcome this limitation, we introduce LoSA, the first memory-and-parameter-efficient backbone adapter designed specifically for TAL to handle untrimmed videos. LoSA specializes for TAL by introducing Long-Short-range Adapters that adapt the intermediate layers of the video backbone over different temporal ranges. These adapters run parallel to the video backbone to significantly reduce memory footprint. LoSA also includes Long-Short-range Gated Fusion that strategically combines the output of these adapters from the video backbone layers to enhance the video features provided to the TAL head. Experiments show that LoSA significantly outperforms all existing methods on standard TAL benchmarks, THUMOS-14 and ActivityNet-v1.3, by scaling end-to-end backbone adaptation to billion-parameter-plus models like VideoMAEv2~(ViT-g) and leveraging them beyond head-only transfer learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01282v2-abstract-full').style.display = 'none'; document.getElementById('2404.01282v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15268">arXiv:2401.15268</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Stable Preferences for Stakeholder-aligned Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sheraz%2C+H">Haleema Sheraz</a>, <a href="/search/cs?searchtype=author&query=Kremer%2C+S+C">Stefan C. Kremer</a>, <a href="/search/cs?searchtype=author&query=Skorburg%2C+J+A">Joshua August Skorburg</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Graham Taylor</a>, <a href="/search/cs?searchtype=author&query=Sinnott-Armstrong%2C+W">Walter Sinnott-Armstrong</a>, <a href="/search/cs?searchtype=author&query=Boerstler%2C+K">Kyle Boerstler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15268v2-abstract-short" style="display: inline;"> In response to the pressing challenge of kidney allocation, characterized by growing demands for organs, this research sets out to develop a data-driven solution to this problem, which also incorporates stakeholder values. The primary objective of this study is to create a method for learning both individual and group-level preferences pertaining to kidney allocations. Drawing upon data from the '… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15268v2-abstract-full').style.display = 'inline'; document.getElementById('2401.15268v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15268v2-abstract-full" style="display: none;"> In response to the pressing challenge of kidney allocation, characterized by growing demands for organs, this research sets out to develop a data-driven solution to this problem, which also incorporates stakeholder values. The primary objective of this study is to create a method for learning both individual and group-level preferences pertaining to kidney allocations. Drawing upon data from the 'Pairwise Kidney Patient Online Survey.' Leveraging two distinct datasets and evaluating across three levels - Individual, Group and Stability - we employ machine learning classifiers assessed through several metrics. The Individual level model predicts individual participant preferences, the Group level model aggregates preferences across participants, and the Stability level model, an extension of the Group level, evaluates the stability of these preferences over time. By incorporating stakeholder preferences into the kidney allocation process, we aspire to advance the ethical dimensions of organ transplantation, contributing to more transparent and equitable practices while promoting the integration of moral values into algorithmic decision-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15268v2-abstract-full').style.display = 'none'; document.getElementById('2401.15268v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.07833">arXiv:2312.07833</a> <span> [<a href="https://arxiv.org/pdf/2312.07833">pdf</a>, <a href="https://arxiv.org/format/2312.07833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Stable Rivers: A Case Study in the Application of Text-to-Image Generative Models for Earth Sciences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kupferschmidt%2C+C">C Kupferschmidt</a>, <a href="/search/cs?searchtype=author&query=Binns%2C+A+D">A. D. Binns</a>, <a href="/search/cs?searchtype=author&query=Kupferschmidt%2C+K+L">K. L. Kupferschmidt</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">G. W Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.07833v1-abstract-short" style="display: inline;"> Text-to-image (TTI) generative models can be used to generate photorealistic images from a given text-string input. These models offer great potential to mitigate challenges to the uptake of machine learning in the earth sciences. However, the rapid increase in their use has raised questions about fairness and biases, with most research to-date focusing on social and cultural areas rather than dom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07833v1-abstract-full').style.display = 'inline'; document.getElementById('2312.07833v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.07833v1-abstract-full" style="display: none;"> Text-to-image (TTI) generative models can be used to generate photorealistic images from a given text-string input. These models offer great potential to mitigate challenges to the uptake of machine learning in the earth sciences. However, the rapid increase in their use has raised questions about fairness and biases, with most research to-date focusing on social and cultural areas rather than domain-specific considerations. We conducted a case study for the earth sciences, focusing on the field of fluvial geomorphology, where we evaluated subject-area specific biases in the training data and downstream model performance of Stable Diffusion (v1.5). In addition to perpetuating Western biases, we found that the training data over-represented scenic locations, such as famous rivers and waterfalls, and showed serious under- and over-representation of many morphological and environmental terms. Despite biased training data, we found that with careful prompting, the Stable Diffusion model was able to generate photorealistic synthetic river images reproducing many important environmental and morphological characteristics. Furthermore, conditional control techniques, such as the use of condition maps with ControlNet were effective for providing additional constraints on output images. Despite great potential for the use of TTI models in the earth sciences field, we advocate for caution in sensitive applications, and advocate for domain-specific reviews of training data and image generation biases to mitigate perpetuation of existing biases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07833v1-abstract-full').style.display = 'none'; document.getElementById('2312.07833v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02401">arXiv:2311.02401</a> <span> [<a href="https://arxiv.org/pdf/2311.02401">pdf</a>, <a href="https://arxiv.org/format/2311.02401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BarcodeBERT: Transformers for Biodiversity Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arias%2C+P+M">Pablo Millan Arias</a>, <a href="/search/cs?searchtype=author&query=Sadjadi%2C+N">Niousha Sadjadi</a>, <a href="/search/cs?searchtype=author&query=Safari%2C+M">Monireh Safari</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Z">ZeMing Gong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A+T">Austin T. Wang</a>, <a href="/search/cs?searchtype=author&query=Lowe%2C+S+C">Scott C. Lowe</a>, <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Zarubiieva%2C+I">Iuliia Zarubiieva</a>, <a href="/search/cs?searchtype=author&query=Steinke%2C+D">Dirk Steinke</a>, <a href="/search/cs?searchtype=author&query=Kari%2C+L">Lila Kari</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+A+X">Angel X. Chang</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02401v1-abstract-short" style="display: inline;"> Understanding biodiversity is a global challenge, in which DNA barcodes - short snippets of DNA that cluster by species - play a pivotal role. In particular, invertebrates, a highly diverse and under-explored group, pose unique taxonomic complexities. We explore machine learning approaches, comparing supervised CNNs, fine-tuned foundation models, and a DNA barcode-specific masking strategy across… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02401v1-abstract-full').style.display = 'inline'; document.getElementById('2311.02401v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02401v1-abstract-full" style="display: none;"> Understanding biodiversity is a global challenge, in which DNA barcodes - short snippets of DNA that cluster by species - play a pivotal role. In particular, invertebrates, a highly diverse and under-explored group, pose unique taxonomic complexities. We explore machine learning approaches, comparing supervised CNNs, fine-tuned foundation models, and a DNA barcode-specific masking strategy across datasets of varying complexity. While simpler datasets and tasks favor supervised CNNs or fine-tuned transformers, challenging species-level identification demands a paradigm shift towards self-supervised pretraining. We propose BarcodeBERT, the first self-supervised method for general biodiversity analysis, leveraging a 1.5 M invertebrate DNA barcode reference library. This work highlights how dataset specifics and coverage impact model selection, and underscores the role of self-supervised pretraining in achieving high-accuracy DNA barcode-based identification at the species and genus level. Indeed, without the fine-tuning step, BarcodeBERT pretrained on a large DNA barcode dataset outperforms DNABERT and DNABERT-2 on multiple downstream classification tasks. The code repository is available at https://github.com/Kari-Genomics-Lab/BarcodeBERT <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02401v1-abstract-full').style.display = 'none'; document.getElementById('2311.02401v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Main text: 5 pages, Total: 9 pages, 2 figures, accepted at the 4th Workshop on Self-Supervised Learning: Theory and Practice (NeurIPS 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00096">arXiv:2311.00096</a> <span> [<a href="https://arxiv.org/pdf/2311.00096">pdf</a>, <a href="https://arxiv.org/format/2311.00096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bandit-Driven Batch Selection for Robust Learning under Label Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lisicki%2C+M">Michal Lisicki</a>, <a href="/search/cs?searchtype=author&query=Nica%2C+M">Mihai Nica</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00096v1-abstract-short" style="display: inline;"> We introduce a novel approach for batch selection in Stochastic Gradient Descent (SGD) training, leveraging combinatorial bandit algorithms. Our methodology focuses on optimizing the learning process in the presence of label noise, a prevalent issue in real-world datasets. Experimental evaluations on the CIFAR-10 dataset reveal that our approach consistently outperforms existing methods across var… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00096v1-abstract-full').style.display = 'inline'; document.getElementById('2311.00096v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00096v1-abstract-full" style="display: none;"> We introduce a novel approach for batch selection in Stochastic Gradient Descent (SGD) training, leveraging combinatorial bandit algorithms. Our methodology focuses on optimizing the learning process in the presence of label noise, a prevalent issue in real-world datasets. Experimental evaluations on the CIFAR-10 dataset reveal that our approach consistently outperforms existing methods across various levels of label corruption. Importantly, we achieve this superior performance without incurring the computational overhead commonly associated with auxiliary neural network models. This work presents a balanced trade-off between computational efficiency and model efficacy, offering a scalable solution for complex machine learning applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00096v1-abstract-full').style.display = 'none'; document.getElementById('2311.00096v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WANT@NeurIPS 2023 & OPT@NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04786">arXiv:2310.04786</a> <span> [<a href="https://arxiv.org/pdf/2310.04786">pdf</a>, <a href="https://arxiv.org/format/2310.04786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Risk Management">q-fin.RM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> On the evolution of data breach reporting patterns and frequency in the United States: a cross-state analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Avanzi%2C+B">Benjamin Avanzi</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xingyun Tan</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Greg Taylor</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+B">Bernard Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04786v2-abstract-short" style="display: inline;"> Understanding the emergence of data breaches is crucial for cyber insurance. However, analyses of data breach frequency trends in the current literature lead to contradictory conclusions. We put forward that those discrepancies may be (at least partially) due to inconsistent data collection standards, as well as reporting patterns, over time and space. We set out to carefully control both. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04786v2-abstract-full').style.display = 'inline'; document.getElementById('2310.04786v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04786v2-abstract-full" style="display: none;"> Understanding the emergence of data breaches is crucial for cyber insurance. However, analyses of data breach frequency trends in the current literature lead to contradictory conclusions. We put forward that those discrepancies may be (at least partially) due to inconsistent data collection standards, as well as reporting patterns, over time and space. We set out to carefully control both. In this paper, we conduct a joint analysis of state Attorneys General's publications on data breaches across eight states (namely, California, Delaware, Indiana, Maine, Montana, North Dakota, Oregon, and Washington), all of which are subject to established data collection standards-namely, state data breach (mandatory) notification laws. Thanks to our explicit recognition of these notification laws, we are capable of modelling frequency of breaches in a consistent and comparable way over time. Hence, we are able to isolate and capture the complexities of reporting patterns, adequately estimate IBNRs, and yield a highly reliable assessment of historical frequency trends in data breaches. Our analysis also provides a comprehensive comparison of data breach frequency across the eight U.S. states, extending knowledge on state-specific differences in cyber risk, which has not been extensively discussed in the current literature. Furthermore, we uncover novel features not previously discussed in the literature, such as differences in cyber risk frequency trends between large and small data breaches. Overall, we find that the reporting delays are lengthening. We also elicit commonalities and heterogeneities in reporting patterns across states, severity levels, and time periods. After adequately estimating IBNRs, we find that frequency is relatively stable before 2020 and increasing after 2020. This is consistent across states. Implications of our findings for cyber insurance are discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04786v2-abstract-full').style.display = 'none'; document.getElementById('2310.04786v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 91G70; 62P05; 91B30 (Primary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.04657">arXiv:2308.04657</a> <span> [<a href="https://arxiv.org/pdf/2308.04657">pdf</a>, <a href="https://arxiv.org/format/2308.04657">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Which Tokens to Use? Investigating Token Reduction in Vision Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Escalera%2C+S">Sergio Escalera</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Moeslund%2C+T+B">Thomas B. Moeslund</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.04657v1-abstract-short" style="display: inline;"> Since the introduction of the Vision Transformer (ViT), researchers have sought to make ViTs more efficient by removing redundant information in the processed tokens. While different methods have been explored to achieve this goal, we still lack understanding of the resulting reduction patterns and how those patterns differ across token reduction methods and datasets. To close this gap, we set out… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04657v1-abstract-full').style.display = 'inline'; document.getElementById('2308.04657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.04657v1-abstract-full" style="display: none;"> Since the introduction of the Vision Transformer (ViT), researchers have sought to make ViTs more efficient by removing redundant information in the processed tokens. While different methods have been explored to achieve this goal, we still lack understanding of the resulting reduction patterns and how those patterns differ across token reduction methods and datasets. To close this gap, we set out to understand the reduction patterns of 10 different token reduction methods using four image classification datasets. By systematically comparing these methods on the different classification tasks, we find that the Top-K pruning method is a surprisingly strong baseline. Through in-depth analysis of the different methods, we determine that: the reduction patterns are generally not consistent when varying the capacity of the backbone model, the reduction patterns of pruning-based methods significantly differ from fixed radial patterns, and the reduction patterns of pruning-based methods are correlated across classification datasets. Finally we report that the similarity of reduction patterns is a moderate-to-strong proxy for model performance. Project page at https://vap.aau.dk/tokens. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04657v1-abstract-full').style.display = 'none'; document.getElementById('2308.04657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2023 NIVT Workshop. Project webpage https://vap.aau.dk/tokens</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.10455">arXiv:2307.10455</a> <span> [<a href="https://arxiv.org/pdf/2307.10455">pdf</a>, <a href="https://arxiv.org/format/2307.10455">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Step Towards Worldwide Biodiversity Assessment: The BIOSCAN-1M Insect Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gharaee%2C+Z">Zahra Gharaee</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Z">ZeMing Gong</a>, <a href="/search/cs?searchtype=author&query=Pellegrino%2C+N">Nicholas Pellegrino</a>, <a href="/search/cs?searchtype=author&query=Zarubiieva%2C+I">Iuliia Zarubiieva</a>, <a href="/search/cs?searchtype=author&query=Haurum%2C+J+B">Joakim Bruslund Haurum</a>, <a href="/search/cs?searchtype=author&query=Lowe%2C+S+C">Scott C. Lowe</a>, <a href="/search/cs?searchtype=author&query=McKeown%2C+J+T+A">Jaclyn T. A. McKeown</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+C+C+Y">Chris C. Y. Ho</a>, <a href="/search/cs?searchtype=author&query=McLeod%2C+J">Joschka McLeod</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y+C">Yi-Yun C Wei</a>, <a href="/search/cs?searchtype=author&query=Agda%2C+J">Jireh Agda</a>, <a href="/search/cs?searchtype=author&query=Ratnasingham%2C+S">Sujeevan Ratnasingham</a>, <a href="/search/cs?searchtype=author&query=Steinke%2C+D">Dirk Steinke</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+A+X">Angel X. Chang</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Fieguth%2C+P">Paul Fieguth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.10455v3-abstract-short" style="display: inline;"> In an effort to catalog insect biodiversity, we propose a new large dataset of hand-labelled insect images, the BIOSCAN-Insect Dataset. Each record is taxonomically classified by an expert, and also has associated genetic information including raw nucleotide barcode sequences and assigned barcode index numbers, which are genetically-based proxies for species classification. This paper presents a c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10455v3-abstract-full').style.display = 'inline'; document.getElementById('2307.10455v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.10455v3-abstract-full" style="display: none;"> In an effort to catalog insect biodiversity, we propose a new large dataset of hand-labelled insect images, the BIOSCAN-Insect Dataset. Each record is taxonomically classified by an expert, and also has associated genetic information including raw nucleotide barcode sequences and assigned barcode index numbers, which are genetically-based proxies for species classification. This paper presents a curated million-image dataset, primarily to train computer-vision models capable of providing image-based taxonomic assessment, however, the dataset also presents compelling characteristics, the study of which would be of interest to the broader machine learning community. Driven by the biological nature inherent to the dataset, a characteristic long-tailed class-imbalance distribution is exhibited. Furthermore, taxonomic labelling is a hierarchical classification scheme, presenting a highly fine-grained classification problem at lower levels. Beyond spurring interest in biodiversity research within the machine learning community, progress on creating an image-based taxonomic classifier will also further the ultimate goal of all BIOSCAN research: to lay the foundation for a comprehensive survey of global biodiversity. This paper introduces the dataset and explores the classification task through the implementation and analysis of a baseline classifier. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10455v3-abstract-full').style.display = 'none'; document.getElementById('2307.10455v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01088">arXiv:2307.01088</a> <span> [<a href="https://arxiv.org/pdf/2307.01088">pdf</a>, <a href="https://arxiv.org/format/2307.01088">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Empirically Validating Conformal Prediction on Modern Vision Architectures Under Distribution Shift and Long-tailed Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kasa%2C+K">Kevin Kasa</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01088v1-abstract-short" style="display: inline;"> Conformal prediction has emerged as a rigorous means of providing deep learning models with reliable uncertainty estimates and safety guarantees. Yet, its performance is known to degrade under distribution shift and long-tailed class distributions, which are often present in real world applications. Here, we characterize the performance of several post-hoc and training-based conformal prediction m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01088v1-abstract-full').style.display = 'inline'; document.getElementById('2307.01088v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01088v1-abstract-full" style="display: none;"> Conformal prediction has emerged as a rigorous means of providing deep learning models with reliable uncertainty estimates and safety guarantees. Yet, its performance is known to degrade under distribution shift and long-tailed class distributions, which are often present in real world applications. Here, we characterize the performance of several post-hoc and training-based conformal prediction methods under these settings, providing the first empirical evaluation on large-scale datasets and models. We show that across numerous conformal methods and neural network families, performance greatly degrades under distribution shifts violating safety guarantees. Similarly, we show that in long-tailed settings the guarantees are frequently violated on many classes. Understanding the limitations of these methods is necessary for deployment in real world and safety-critical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01088v1-abstract-full').style.display = 'none'; document.getElementById('2307.01088v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.13755">arXiv:2303.13755</a> <span> [<a href="https://arxiv.org/pdf/2303.13755">pdf</a>, <a href="https://arxiv.org/format/2303.13755">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sparsifiner: Learning Sparse Instance-Dependent Attention for Efficient Vision Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+C">Cong Wei</a>, <a href="/search/cs?searchtype=author&query=Duke%2C+B">Brendan Duke</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+R">Ruowei Jiang</a>, <a href="/search/cs?searchtype=author&query=Aarabi%2C+P">Parham Aarabi</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Shkurti%2C+F">Florian Shkurti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.13755v1-abstract-short" style="display: inline;"> Vision Transformers (ViT) have shown their competitive advantages performance-wise compared to convolutional neural networks (CNNs) though they often come with high computational costs. To this end, previous methods explore different attention patterns by limiting a fixed number of spatially nearby tokens to accelerate the ViT's multi-head self-attention (MHSA) operations. However, such structured… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13755v1-abstract-full').style.display = 'inline'; document.getElementById('2303.13755v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.13755v1-abstract-full" style="display: none;"> Vision Transformers (ViT) have shown their competitive advantages performance-wise compared to convolutional neural networks (CNNs) though they often come with high computational costs. To this end, previous methods explore different attention patterns by limiting a fixed number of spatially nearby tokens to accelerate the ViT's multi-head self-attention (MHSA) operations. However, such structured attention patterns limit the token-to-token connections to their spatial relevance, which disregards learned semantic connections from a full attention mask. In this work, we propose a novel approach to learn instance-dependent attention patterns, by devising a lightweight connectivity predictor module to estimate the connectivity score of each pair of tokens. Intuitively, two tokens have high connectivity scores if the features are considered relevant either spatially or semantically. As each token only attends to a small number of other tokens, the binarized connectivity masks are often very sparse by nature and therefore provide the opportunity to accelerate the network via sparse computations. Equipped with the learned unstructured attention pattern, sparse attention ViT (Sparsifiner) produces a superior Pareto-optimal trade-off between FLOPs and top-1 accuracy on ImageNet compared to token sparsity. Our method reduces 48% to 69% FLOPs of MHSA while the accuracy drop is within 0.4%. We also show that combining attention and token sparsity reduces ViT FLOPs by over 60%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13755v1-abstract-full').style.display = 'none'; document.getElementById('2303.13755v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.05132">arXiv:2302.05132</a> <span> [<a href="https://arxiv.org/pdf/2302.05132">pdf</a>, <a href="https://arxiv.org/format/2302.05132">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GCNet: Probing Self-Similarity Learning for Generalized Counting Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mingjie Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yande Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+M">Minglun Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.05132v1-abstract-short" style="display: inline;"> The class-agnostic counting (CAC) problem has caught increasing attention recently due to its wide societal applications and arduous challenges. To count objects of different categories, existing approaches rely on user-provided exemplars, which is hard-to-obtain and limits their generality. In this paper, we aim to empower the framework to recognize adaptive exemplars within the whole images. A z… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05132v1-abstract-full').style.display = 'inline'; document.getElementById('2302.05132v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.05132v1-abstract-full" style="display: none;"> The class-agnostic counting (CAC) problem has caught increasing attention recently due to its wide societal applications and arduous challenges. To count objects of different categories, existing approaches rely on user-provided exemplars, which is hard-to-obtain and limits their generality. In this paper, we aim to empower the framework to recognize adaptive exemplars within the whole images. A zero-shot Generalized Counting Network (GCNet) is developed, which uses a pseudo-Siamese structure to automatically and effectively learn pseudo exemplar clues from inherent repetition patterns. In addition, a weakly-supervised scheme is presented to reduce the burden of laborious density maps required by all contemporary CAC models, allowing GCNet to be trained using count-level supervisory signals in an end-to-end manner. Without providing any spatial location hints, GCNet is capable of adaptively capturing them through a carefully-designed self-similarity learning strategy. Extensive experiments and ablation studies on the prevailing benchmark FSC147 for zero-shot CAC demonstrate the superiority of our GCNet. It performs on par with existing exemplar-dependent methods and shows stunning cross-dataset generality on crowd-specific datasets, e.g., ShanghaiTech Part A, Part B and UCF_QNRF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05132v1-abstract-full').style.display = 'none'; document.getElementById('2302.05132v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12710">arXiv:2301.12710</a> <span> [<a href="https://arxiv.org/pdf/2301.12710">pdf</a>, <a href="https://arxiv.org/format/2301.12710">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Econometrics">econ.EM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Risk Management">q-fin.RM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1017/asb.2024.7">10.1017/asb.2024.7 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Machine Learning with High-Cardinality Categorical Features in Actuarial Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Avanzi%2C+B">Benjamin Avanzi</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Greg Taylor</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Melantha Wang</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+B">Bernard Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12710v1-abstract-short" style="display: inline;"> High-cardinality categorical features are pervasive in actuarial data (e.g. occupation in commercial property insurance). Standard categorical encoding methods like one-hot encoding are inadequate in these settings. In this work, we present a novel _Generalised Linear Mixed Model Neural Network_ ("GLMMNet") approach to the modelling of high-cardinality categorical features. The GLMMNet integrate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12710v1-abstract-full').style.display = 'inline'; document.getElementById('2301.12710v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12710v1-abstract-full" style="display: none;"> High-cardinality categorical features are pervasive in actuarial data (e.g. occupation in commercial property insurance). Standard categorical encoding methods like one-hot encoding are inadequate in these settings. In this work, we present a novel _Generalised Linear Mixed Model Neural Network_ ("GLMMNet") approach to the modelling of high-cardinality categorical features. The GLMMNet integrates a generalised linear mixed model in a deep learning framework, offering the predictive power of neural networks and the transparency of random effects estimates, the latter of which cannot be obtained from the entity embedding models. Further, its flexibility to deal with any distribution in the exponential dispersion (ED) family makes it widely applicable to many actuarial contexts and beyond. We illustrate and compare the GLMMNet against existing approaches in a range of simulation experiments as well as in a real-life insurance case study. Notably, we find that the GLMMNet often outperforms or at least performs comparably with an entity embedded neural network, while providing the additional benefit of transparency, which is particularly valuable in practical applications. Importantly, while our model was motivated by actuarial applications, it can have wider applicability. The GLMMNet would suit any applications that involve high-cardinality categorical variables and where the response cannot be sufficiently modelled by a Gaussian distribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12710v1-abstract-full').style.display = 'none'; document.getElementById('2301.12710v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 91G70; 91G60; 62P05 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ASTIN Bull. 54 (2024) 213-238 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10351">arXiv:2301.10351</a> <span> [<a href="https://arxiv.org/pdf/2301.10351">pdf</a>, <a href="https://arxiv.org/format/2301.10351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Few-Shot Learning Enables Population-Scale Analysis of Leaf Traits in Populus trichocarpa </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lagergren%2C+J">John Lagergren</a>, <a href="/search/cs?searchtype=author&query=Pavicic%2C+M">Mirko Pavicic</a>, <a href="/search/cs?searchtype=author&query=Chhetri%2C+H+B">Hari B. Chhetri</a>, <a href="/search/cs?searchtype=author&query=York%2C+L+M">Larry M. York</a>, <a href="/search/cs?searchtype=author&query=Hyatt%2C+P+D">P. Doug Hyatt</a>, <a href="/search/cs?searchtype=author&query=Kainer%2C+D">David Kainer</a>, <a href="/search/cs?searchtype=author&query=Rutter%2C+E+M">Erica M. Rutter</a>, <a href="/search/cs?searchtype=author&query=Flores%2C+K">Kevin Flores</a>, <a href="/search/cs?searchtype=author&query=Bailey-Bale%2C+J">Jack Bailey-Bale</a>, <a href="/search/cs?searchtype=author&query=Klein%2C+M">Marie Klein</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gail Taylor</a>, <a href="/search/cs?searchtype=author&query=Jacobson%2C+D">Daniel Jacobson</a>, <a href="/search/cs?searchtype=author&query=Streich%2C+J">Jared Streich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10351v3-abstract-short" style="display: inline;"> Plant phenotyping is typically a time-consuming and expensive endeavor, requiring large groups of researchers to meticulously measure biologically relevant plant traits, and is the main bottleneck in understanding plant adaptation and the genetic architecture underlying complex traits at population scale. In this work, we address these challenges by leveraging few-shot learning with convolutional… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10351v3-abstract-full').style.display = 'inline'; document.getElementById('2301.10351v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10351v3-abstract-full" style="display: none;"> Plant phenotyping is typically a time-consuming and expensive endeavor, requiring large groups of researchers to meticulously measure biologically relevant plant traits, and is the main bottleneck in understanding plant adaptation and the genetic architecture underlying complex traits at population scale. In this work, we address these challenges by leveraging few-shot learning with convolutional neural networks (CNNs) to segment the leaf body and visible venation of 2,906 P. trichocarpa leaf images obtained in the field. In contrast to previous methods, our approach (i) does not require experimental or image pre-processing, (ii) uses the raw RGB images at full resolution, and (iii) requires very few samples for training (e.g., just eight images for vein segmentation). Traits relating to leaf morphology and vein topology are extracted from the resulting segmentations using traditional open-source image-processing tools, validated using real-world physical measurements, and used to conduct a genome-wide association study to identify genes controlling the traits. In this way, the current work is designed to provide the plant phenotyping community with (i) methods for fast and accurate image-based feature extraction that require minimal training data, and (ii) a new population-scale data set, including 68 different leaf phenotypes, for domain scientists and machine learning researchers. All of the few-shot learning code, data, and results are made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10351v3-abstract-full').style.display = 'none'; document.getElementById('2301.10351v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.08292">arXiv:2301.08292</a> <span> [<a href="https://arxiv.org/pdf/2301.08292">pdf</a>, <a href="https://arxiv.org/format/2301.08292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Quantum HyperNetworks: Training Binary Neural Networks in Quantum Superposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Carrasquilla%2C+J">Juan Carrasquilla</a>, <a href="/search/cs?searchtype=author&query=Hibat-Allah%2C+M">Mohamed Hibat-Allah</a>, <a href="/search/cs?searchtype=author&query=Inack%2C+E">Estelle Inack</a>, <a href="/search/cs?searchtype=author&query=Makhzani%2C+A">Alireza Makhzani</a>, <a href="/search/cs?searchtype=author&query=Neklyudov%2C+K">Kirill Neklyudov</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Torlai%2C+G">Giacomo Torlai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.08292v1-abstract-short" style="display: inline;"> Binary neural networks, i.e., neural networks whose parameters and activations are constrained to only two possible values, offer a compelling avenue for the deployment of deep learning models on energy- and memory-limited devices. However, their training, architectural design, and hyperparameter tuning remain challenging as these involve multiple computationally expensive combinatorial optimizati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.08292v1-abstract-full').style.display = 'inline'; document.getElementById('2301.08292v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.08292v1-abstract-full" style="display: none;"> Binary neural networks, i.e., neural networks whose parameters and activations are constrained to only two possible values, offer a compelling avenue for the deployment of deep learning models on energy- and memory-limited devices. However, their training, architectural design, and hyperparameter tuning remain challenging as these involve multiple computationally expensive combinatorial optimization problems. Here we introduce quantum hypernetworks as a mechanism to train binary neural networks on quantum computers, which unify the search over parameters, hyperparameters, and architectures in a single optimization loop. Through classical simulations, we demonstrate that of our approach effectively finds optimal parameters, hyperparameters and architectural choices with high probability on classification problems including a two-dimensional Gaussian dataset and a scaled-down version of the MNIST handwritten digits. We represent our quantum hypernetworks as variational quantum circuits, and find that an optimal circuit depth maximizes the probability of finding performant binary neural networks. Our unified approach provides an immense scope for other applications in the field of machine learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.08292v1-abstract-full').style.display = 'none'; document.getElementById('2301.08292v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures. Minimal implementation: https://github.com/carrasqu/binncode</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.01099">arXiv:2210.01099</a> <span> [<a href="https://arxiv.org/pdf/2210.01099">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Model error and its estimation, with particular application to loss reserving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Taylor%2C+G">G Taylor</a>, <a href="/search/cs?searchtype=author&query=McGuire%2C+G">G McGuire</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.01099v1-abstract-short" style="display: inline;"> This paper is concerned with forecast error, particularly in relation to loss reserving. This is generally regarded as consisting of three components, namely parameter, process and model errors. The first two of these components, and their estimation, are well understood, but less so model error. Model error itself is considered in two parts: one part that is capable of estimation from past data (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01099v1-abstract-full').style.display = 'inline'; document.getElementById('2210.01099v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.01099v1-abstract-full" style="display: none;"> This paper is concerned with forecast error, particularly in relation to loss reserving. This is generally regarded as consisting of three components, namely parameter, process and model errors. The first two of these components, and their estimation, are well understood, but less so model error. Model error itself is considered in two parts: one part that is capable of estimation from past data (internal model error), and another part that is not (external model error). Attention is focused here on internal model error. Estimation of this error component is approached by means of Bayesian model averaging, using the Bayesian interpretation of the LASSO. This is used to generate a set of admissible models, each with its prior probability and the likelihood of observed data. A posterior on the model set, conditional on the data, results, and an estimate of model error (contained in a loss reserve) is obtained as the variance of the loss reserve according to this posterior. The population of models entering materially into the support of the posterior may turn out to be thinner than desired, and bootstrapping of the LASSO is used to gain bulk. This provides the bonus of an estimate of parameter error also. It turns out that the estimates of parameter and model errors are entangled, and dissociation of them is at least difficult, and possibly not even meaningful. These matters are discussed. The majority of the discussion applies to forecasting generally, but numerical illustration of the concepts is given in relation to insurance data and the problem of insurance loss reserving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01099v1-abstract-full').style.display = 'none'; document.getElementById('2210.01099v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62P05 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.3; I.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09408">arXiv:2207.09408</a> <span> [<a href="https://arxiv.org/pdf/2207.09408">pdf</a>, <a href="https://arxiv.org/format/2207.09408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bounding generalization error with input compression: An empirical study with infinite-width networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Galloway%2C+A">Angus Galloway</a>, <a href="/search/cs?searchtype=author&query=Golubeva%2C+A">Anna Golubeva</a>, <a href="/search/cs?searchtype=author&query=Salem%2C+M">Mahmoud Salem</a>, <a href="/search/cs?searchtype=author&query=Nica%2C+M">Mihai Nica</a>, <a href="/search/cs?searchtype=author&query=Ioannou%2C+Y">Yani Ioannou</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09408v1-abstract-short" style="display: inline;"> Estimating the Generalization Error (GE) of Deep Neural Networks (DNNs) is an important task that often relies on availability of held-out data. The ability to better predict GE based on a single training set may yield overarching DNN design principles to reduce a reliance on trial-and-error, along with other performance assessment advantages. In search of a quantity relevant to GE, we investigate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09408v1-abstract-full').style.display = 'inline'; document.getElementById('2207.09408v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09408v1-abstract-full" style="display: none;"> Estimating the Generalization Error (GE) of Deep Neural Networks (DNNs) is an important task that often relies on availability of held-out data. The ability to better predict GE based on a single training set may yield overarching DNN design principles to reduce a reliance on trial-and-error, along with other performance assessment advantages. In search of a quantity relevant to GE, we investigate the Mutual Information (MI) between the input and final layer representations, using the infinite-width DNN limit to bound MI. An existing input compression-based GE bound is used to link MI and GE. To the best of our knowledge, this represents the first empirical study of this bound. In our attempt to empirically falsify the theoretical bound, we find that it is often tight for best-performing models. Furthermore, it detects randomization of training labels in many cases, reflects test-time perturbation robustness, and works well given only few training samples. These results are promising given that input compression is broadly applicable where MI can be estimated with confidence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09408v1-abstract-full').style.display = 'none'; document.getElementById('2207.09408v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages main content, 26 pages total</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.13034">arXiv:2206.13034</a> <span> [<a href="https://arxiv.org/pdf/2206.13034">pdf</a>, <a href="https://arxiv.org/format/2206.13034">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Monitoring Shortcut Learning using Mutual Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Adnan%2C+M">Mohammed Adnan</a>, <a href="/search/cs?searchtype=author&query=Ioannou%2C+Y">Yani Ioannou</a>, <a href="/search/cs?searchtype=author&query=Tsai%2C+C">Chuan-Yung Tsai</a>, <a href="/search/cs?searchtype=author&query=Galloway%2C+A">Angus Galloway</a>, <a href="/search/cs?searchtype=author&query=Tizhoosh%2C+H+R">H. R. Tizhoosh</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.13034v1-abstract-short" style="display: inline;"> The failure of deep neural networks to generalize to out-of-distribution data is a well-known problem and raises concerns about the deployment of trained networks in safety-critical domains such as healthcare, finance and autonomous vehicles. We study a particular kind of distribution shift $\unicode{x2013}$ shortcuts or spurious correlations in the training data. Shortcut learning is often only e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.13034v1-abstract-full').style.display = 'inline'; document.getElementById('2206.13034v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.13034v1-abstract-full" style="display: none;"> The failure of deep neural networks to generalize to out-of-distribution data is a well-known problem and raises concerns about the deployment of trained networks in safety-critical domains such as healthcare, finance and autonomous vehicles. We study a particular kind of distribution shift $\unicode{x2013}$ shortcuts or spurious correlations in the training data. Shortcut learning is often only exposed when models are evaluated on real-world data that does not contain the same spurious correlations, posing a serious dilemma for AI practitioners to properly assess the effectiveness of a trained model for real-world applications. In this work, we propose to use the mutual information (MI) between the learned representation and the input as a metric to find where in training, the network latches onto shortcuts. Experiments demonstrate that MI can be used as a domain-agnostic metric for monitoring shortcut learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.13034v1-abstract-full').style.display = 'none'; document.getElementById('2206.13034v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICML 2022 Workshop on Spurious Correlations, Invariance, and Stability</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.02879">arXiv:2205.02879</a> <span> [<a href="https://arxiv.org/pdf/2205.02879">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Ligand Additivity for Transferable Machine Learning of Multireference Character Across Known Transition Metal Complex Ligands </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+C">Chenru Duan</a>, <a href="/search/cs?searchtype=author&query=Ladera%2C+A+J">Adriana J. Ladera</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+C+-">Julian C. -L. Liu</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+M+G">Michael G. Taylor</a>, <a href="/search/cs?searchtype=author&query=Ariyarathna%2C+I+R">Isuru R. Ariyarathna</a>, <a href="/search/cs?searchtype=author&query=Kulik%2C+H+J">Heather J. Kulik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.02879v1-abstract-short" style="display: inline;"> Accurate virtual high-throughput screening (VHTS) of transition metal complexes (TMCs) remains challenging due to the possibility of high multi-reference (MR) character that complicates property evaluation. We compute MR diagnostics for over 5,000 ligands present in previously synthesized transition metal complexes in the Cambridge Structural Database (CSD). To accomplish this task, we introduce a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.02879v1-abstract-full').style.display = 'inline'; document.getElementById('2205.02879v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.02879v1-abstract-full" style="display: none;"> Accurate virtual high-throughput screening (VHTS) of transition metal complexes (TMCs) remains challenging due to the possibility of high multi-reference (MR) character that complicates property evaluation. We compute MR diagnostics for over 5,000 ligands present in previously synthesized transition metal complexes in the Cambridge Structural Database (CSD). To accomplish this task, we introduce an iterative approach for consistent ligand charge assignment for ligands in the CSD. Across this set, we observe that MR character correlates linearly with the inverse value of the averaged bond order over all bonds in the molecule. We then demonstrate that ligand additivity of MR character holds in TMCs, which suggests that the TMC MR character can be inferred from the sum of the MR character of the ligands. Encouraged by this observation, we leverage ligand additivity and develop a ligand-derived machine learning representation to train neural networks to predict the MR character of TMCs from properties of the constituent ligands. This approach yields models with excellent performance and superior transferability to unseen ligand chemistry and compositions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.02879v1-abstract-full').style.display = 'none'; document.getElementById('2205.02879v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.13829">arXiv:2204.13829</a> <span> [<a href="https://arxiv.org/pdf/2204.13829">pdf</a>, <a href="https://arxiv.org/format/2204.13829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Tissues and Organs">q-bio.TO</span> </div> </div> <p class="title is-5 mathjax"> Understanding the impact of image and input resolution on deep digital pathology patch classifiers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Teh%2C+E+W">Eu Wern Teh</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.13829v1-abstract-short" style="display: inline;"> We consider annotation efficient learning in Digital Pathology (DP), where expert annotations are expensive and thus scarce. We explore the impact of image and input resolution on DP patch classification performance. We use two cancer patch classification datasets PCam and CRC, to validate the results of our study. Our experiments show that patch classification performance can be improved by manip… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.13829v1-abstract-full').style.display = 'inline'; document.getElementById('2204.13829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.13829v1-abstract-full" style="display: none;"> We consider annotation efficient learning in Digital Pathology (DP), where expert annotations are expensive and thus scarce. We explore the impact of image and input resolution on DP patch classification performance. We use two cancer patch classification datasets PCam and CRC, to validate the results of our study. Our experiments show that patch classification performance can be improved by manipulating both the image and input resolution in annotation-scarce and annotation-rich environments. We show a positive correlation between the image and input resolution and the patch classification accuracy on both datasets. By exploiting the image and input resolution, our final model trained on < 1% of data performs equally well compared to the model trained on 100% of data in the original image resolution on the PCam dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.13829v1-abstract-full').style.display = 'none'; document.getElementById('2204.13829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the Conference on Computer and Robot Vision (CRV), 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.12602">arXiv:2201.12602</a> <span> [<a href="https://arxiv.org/pdf/2201.12602">pdf</a>, <a href="https://arxiv.org/format/2201.12602">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepRNG: Towards Deep Reinforcement Learning-Assisted Generative Testing of Software </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsai%2C+C">Chuan-Yung Tsai</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.12602v1-abstract-short" style="display: inline;"> Although machine learning (ML) has been successful in automating various software engineering needs, software testing still remains a highly challenging topic. In this paper, we aim to improve the generative testing of software by directly augmenting the random number generator (RNG) with a deep reinforcement learning (RL) agent using an efficient, automatically extractable state representation of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12602v1-abstract-full').style.display = 'inline'; document.getElementById('2201.12602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.12602v1-abstract-full" style="display: none;"> Although machine learning (ML) has been successful in automating various software engineering needs, software testing still remains a highly challenging topic. In this paper, we aim to improve the generative testing of software by directly augmenting the random number generator (RNG) with a deep reinforcement learning (RL) agent using an efficient, automatically extractable state representation of the software under test. Using the Cosmos SDK as the testbed, we show that the proposed DeepRNG framework provides a statistically significant improvement to the testing of the highly complex software library with over 350,000 lines of code. The source code of the DeepRNG framework is publicly available online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12602v1-abstract-full').style.display = 'none'; document.getElementById('2201.12602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Workshop on ML for Systems, 35th Conference on Neural Information Processing Systems (NeurIPS 2021)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.09871">arXiv:2201.09871</a> <span> [<a href="https://arxiv.org/pdf/2201.09871">pdf</a>, <a href="https://arxiv.org/format/2201.09871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> On Evaluation Metrics for Graph Generative Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thompson%2C+R">Rylee Thompson</a>, <a href="/search/cs?searchtype=author&query=Knyazev%2C+B">Boris Knyazev</a>, <a href="/search/cs?searchtype=author&query=Ghalebi%2C+E">Elahe Ghalebi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jungtaek Kim</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.09871v2-abstract-short" style="display: inline;"> In image generation, generative models can be evaluated naturally by visually inspecting model outputs. However, this is not always the case for graph generative models (GGMs), making their evaluation challenging. Currently, the standard process for evaluating GGMs suffers from three critical limitations: i) it does not produce a single score which makes model selection challenging, ii) in many ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.09871v2-abstract-full').style.display = 'inline'; document.getElementById('2201.09871v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.09871v2-abstract-full" style="display: none;"> In image generation, generative models can be evaluated naturally by visually inspecting model outputs. However, this is not always the case for graph generative models (GGMs), making their evaluation challenging. Currently, the standard process for evaluating GGMs suffers from three critical limitations: i) it does not produce a single score which makes model selection challenging, ii) in many cases it fails to consider underlying edge and node features, and iii) it is prohibitively slow to perform. In this work, we mitigate these issues by searching for scalar, domain-agnostic, and scalable metrics for evaluating and ranking GGMs. To this end, we study existing GGM metrics and neural-network-based metrics emerging from generative models of images that use embeddings extracted from a task-specific network. Motivated by the power of certain Graph Neural Networks (GNNs) to extract meaningful graph representations without any training, we introduce several metrics based on the features extracted by an untrained random GNN. We design experiments to thoroughly test metrics on their ability to measure the diversity and fidelity of generated graphs, as well as their sample and computational efficiency. Depending on the quantity of samples, we recommend one of two random-GNN-based metrics that we show to be more expressive than pre-existing metrics. While we focus on applying these metrics to GGM evaluation, in practice this enables the ability to easily compute the dissimilarity between any two sets of graphs regardless of domain. Our code is released at: https://github.com/uoguelph-mlrg/GGM-metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.09871v2-abstract-full').style.display = 'none'; document.getElementById('2201.09871v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICLR 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.02627">arXiv:2201.02627</a> <span> [<a href="https://arxiv.org/pdf/2201.02627">pdf</a>, <a href="https://arxiv.org/format/2201.02627">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning with Less Labels in Digital Pathology via Scribble Supervision from Natural Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Teh%2C+E+W">Eu Wern Teh</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.02627v2-abstract-short" style="display: inline;"> A critical challenge of training deep learning models in the Digital Pathology (DP) domain is the high annotation cost by medical experts. One way to tackle this issue is via transfer learning from the natural image domain (NI), where the annotation cost is considerably cheaper. Cross-domain transfer learning from NI to DP is shown to be successful via class labels. One potential weakness of relyi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.02627v2-abstract-full').style.display = 'inline'; document.getElementById('2201.02627v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.02627v2-abstract-full" style="display: none;"> A critical challenge of training deep learning models in the Digital Pathology (DP) domain is the high annotation cost by medical experts. One way to tackle this issue is via transfer learning from the natural image domain (NI), where the annotation cost is considerably cheaper. Cross-domain transfer learning from NI to DP is shown to be successful via class labels. One potential weakness of relying on class labels is the lack of spatial information, which can be obtained from spatial labels such as full pixel-wise segmentation labels and scribble labels. We demonstrate that scribble labels from NI domain can boost the performance of DP models on two cancer classification datasets (Patch Camelyon Breast Cancer and Colorectal Cancer dataset). Furthermore, we show that models trained with scribble labels yield the same performance boost as full pixel-wise segmentation labels despite being significantly easier and faster to collect. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.02627v2-abstract-full').style.display = 'none'; document.getElementById('2201.02627v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in IEEE International Symposium on Biomedical Imaging (ISBI) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.00762">arXiv:2201.00762</a> <span> [<a href="https://arxiv.org/pdf/2201.00762">pdf</a>, <a href="https://arxiv.org/format/2201.00762">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Execute Order 66: Targeted Data Poisoning for Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Foley%2C+H">Harrison Foley</a>, <a href="/search/cs?searchtype=author&query=Fowl%2C+L">Liam Fowl</a>, <a href="/search/cs?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.00762v2-abstract-short" style="display: inline;"> Data poisoning for reinforcement learning has historically focused on general performance degradation, and targeted attacks have been successful via perturbations that involve control of the victim's policy and rewards. We introduce an insidious poisoning attack for reinforcement learning which causes agent misbehavior only at specific target states - all while minimally modifying a small fraction… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00762v2-abstract-full').style.display = 'inline'; document.getElementById('2201.00762v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.00762v2-abstract-full" style="display: none;"> Data poisoning for reinforcement learning has historically focused on general performance degradation, and targeted attacks have been successful via perturbations that involve control of the victim's policy and rewards. We introduce an insidious poisoning attack for reinforcement learning which causes agent misbehavior only at specific target states - all while minimally modifying a small fraction of training observations without assuming any control over policy or reward. We accomplish this by adapting a recent technique, gradient alignment, to reinforcement learning. We test our method and demonstrate success in two Atari games of varying difficulty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00762v2-abstract-full').style.display = 'none'; document.getElementById('2201.00762v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Workshop on Safe and Robust Control of Uncertain Systems at the 35th Conference on Neural Information Processing Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.02622">arXiv:2112.02622</a> <span> [<a href="https://arxiv.org/pdf/2112.02622">pdf</a>, <a href="https://arxiv.org/format/2112.02622">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/s21238009">10.3390/s21238009 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Probabilistic Deep Learning to Quantify Uncertainty in Air Quality Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Murad%2C+A">Abdulmajid Murad</a>, <a href="/search/cs?searchtype=author&query=Kraemer%2C+F+A">Frank Alexander Kraemer</a>, <a href="/search/cs?searchtype=author&query=Bach%2C+K">Kerstin Bach</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.02622v1-abstract-short" style="display: inline;"> Data-driven forecasts of air quality have recently achieved more accurate short-term predictions. Despite their success, most of the current data-driven solutions lack proper quantifications of model uncertainty that communicate how much to trust the forecasts. Recently, several practical tools to estimate uncertainty have been developed in probabilistic deep learning. However, there have not been… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.02622v1-abstract-full').style.display = 'inline'; document.getElementById('2112.02622v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.02622v1-abstract-full" style="display: none;"> Data-driven forecasts of air quality have recently achieved more accurate short-term predictions. Despite their success, most of the current data-driven solutions lack proper quantifications of model uncertainty that communicate how much to trust the forecasts. Recently, several practical tools to estimate uncertainty have been developed in probabilistic deep learning. However, there have not been empirical applications and extensive comparisons of these tools in the domain of air quality forecasts. Therefore, this work applies state-of-the-art techniques of uncertainty quantification in a real-world setting of air quality forecasts. Through extensive experiments, we describe training probabilistic models and evaluate their predictive uncertainties based on empirical performance, reliability of confidence estimate, and practical applicability. We also propose improving these models using "free" adversarial training and exploiting temporal and spatial correlation inherent in air quality data. Our experiments demonstrate that the proposed models perform better than previous works in quantifying uncertainty in data-driven air quality forecasts. Overall, Bayesian neural networks provide a more reliable uncertainty estimate but can be challenging to implement and scale. Other scalable methods, such as deep ensemble, Monte Carlo (MC) dropout, and stochastic weight averaging-Gaussian (SWAG), can perform well if applied correctly but with different tradeoffs and slight variations in performance metrics. Finally, our results show the practical impact of uncertainty estimation and demonstrate that, indeed, probabilistic models are more suitable for making informed decisions. Code and dataset are available at \url{https://github.com/Abdulmajid-Murad/deep_probabilistic_forecast} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.02622v1-abstract-full').style.display = 'none'; document.getElementById('2112.02622v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sensors, 21(23) 2021) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.12170">arXiv:2111.12170</a> <span> [<a href="https://arxiv.org/pdf/2111.12170">pdf</a>, <a href="https://arxiv.org/format/2111.12170">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Domain-Agnostic Clustering with Self-Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Adnan%2C+M">Mohammed Adnan</a>, <a href="/search/cs?searchtype=author&query=Ioannou%2C+Y+A">Yani A. Ioannou</a>, <a href="/search/cs?searchtype=author&query=Tsai%2C+C">Chuan-Yung Tsai</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.12170v2-abstract-short" style="display: inline;"> Recent advancements in self-supervised learning have reduced the gap between supervised and unsupervised representation learning. However, most self-supervised and deep clustering techniques rely heavily on data augmentation, rendering them ineffective for many learning tasks where insufficient domain knowledge exists for performing augmentation. We propose a new self-distillation based algorithm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.12170v2-abstract-full').style.display = 'inline'; document.getElementById('2111.12170v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.12170v2-abstract-full" style="display: none;"> Recent advancements in self-supervised learning have reduced the gap between supervised and unsupervised representation learning. However, most self-supervised and deep clustering techniques rely heavily on data augmentation, rendering them ineffective for many learning tasks where insufficient domain knowledge exists for performing augmentation. We propose a new self-distillation based algorithm for domain-agnostic clustering. Our method builds upon the existing deep clustering frameworks and requires no separate student model. The proposed method outperforms existing domain agnostic (augmentation-free) algorithms on CIFAR-10. We empirically demonstrate that knowledge distillation can improve unsupervised representation learning by extracting richer `dark knowledge' from the model than using predicted labels alone. Preliminary experiments also suggest that self-distillation improves the convergence of DeepCluster-v2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.12170v2-abstract-full').style.display = 'none'; document.getElementById('2111.12170v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021 Workshop: Self-Supervised Learning - Theory and Practice</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.03543">arXiv:2111.03543</a> <span> [<a href="https://arxiv.org/pdf/2111.03543">pdf</a>, <a href="https://arxiv.org/format/2111.03543">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Empirical analysis of representation learning and exploration in neural kernel bandits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lisicki%2C+M">Michal Lisicki</a>, <a href="/search/cs?searchtype=author&query=Afkanpour%2C+A">Arash Afkanpour</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.03543v2-abstract-short" style="display: inline;"> Neural bandits have been shown to provide an efficient solution to practical sequential decision tasks that have nonlinear reward functions. The main contributor to that success is approximate Bayesian inference, which enables neural network (NN) training with uncertainty estimates. However, Bayesian NNs often suffer from a prohibitive computational overhead or operate on a subset of parameters. A… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.03543v2-abstract-full').style.display = 'inline'; document.getElementById('2111.03543v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.03543v2-abstract-full" style="display: none;"> Neural bandits have been shown to provide an efficient solution to practical sequential decision tasks that have nonlinear reward functions. The main contributor to that success is approximate Bayesian inference, which enables neural network (NN) training with uncertainty estimates. However, Bayesian NNs often suffer from a prohibitive computational overhead or operate on a subset of parameters. Alternatively, certain classes of infinite neural networks were shown to directly correspond to Gaussian processes (GP) with neural kernels (NK). NK-GPs provide accurate uncertainty estimates and can be trained faster than most Bayesian NNs. We propose to guide common bandit policies with NK distributions and show that NK bandits achieve state-of-the-art performance on nonlinear structured data. Moreover, we propose a framework for measuring independently the ability of a bandit algorithm to learn representations and explore, and use it to analyze the impact of NK distributions w.r.t.~those two aspects. We consider policies based on a GP and a Student's t-process (TP). Furthermore, we study practical considerations, such as training frequency and model partitioning. We believe our work will help better understand the impact of utilizing NKs in applied settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.03543v2-abstract-full').style.display = 'none'; document.getElementById('2111.03543v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version. Added a major experiment comparing NK distribution w.r.t. exploration and exploitation. Submitted to ICLR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.15481">arXiv:2110.15481</a> <span> [<a href="https://arxiv.org/pdf/2110.15481">pdf</a>, <a href="https://arxiv.org/format/2110.15481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Brick-by-Brick: Combinatorial Construction with Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chung%2C+H">Hyunsoo Chung</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jungtaek Kim</a>, <a href="/search/cs?searchtype=author&query=Knyazev%2C+B">Boris Knyazev</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jinhwi Lee</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jaesik Park</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+M">Minsu Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.15481v1-abstract-short" style="display: inline;"> Discovering a solution in a combinatorial space is prevalent in many real-world problems but it is also challenging due to diverse complex constraints and the vast number of possible combinations. To address such a problem, we introduce a novel formulation, combinatorial construction, which requires a building agent to assemble unit primitives (i.e., LEGO bricks) sequentially -- every connection b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.15481v1-abstract-full').style.display = 'inline'; document.getElementById('2110.15481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.15481v1-abstract-full" style="display: none;"> Discovering a solution in a combinatorial space is prevalent in many real-world problems but it is also challenging due to diverse complex constraints and the vast number of possible combinations. To address such a problem, we introduce a novel formulation, combinatorial construction, which requires a building agent to assemble unit primitives (i.e., LEGO bricks) sequentially -- every connection between two bricks must follow a fixed rule, while no bricks mutually overlap. To construct a target object, we provide incomplete knowledge about the desired target (i.e., 2D images) instead of exact and explicit volumetric information to the agent. This problem requires a comprehensive understanding of partial information and long-term planning to append a brick sequentially, which leads us to employ reinforcement learning. The approach has to consider a variable-sized action space where a large number of invalid actions, which would cause overlap between bricks, exist. To resolve these issues, our model, dubbed Brick-by-Brick, adopts an action validity prediction network that efficiently filters invalid actions for an actor-critic network. We demonstrate that the proposed method successfully learns to construct an unseen object conditioned on a single image or multiple views of a target object. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.15481v1-abstract-full').style.display = 'none'; document.getElementById('2110.15481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 13 figures, 7 tables. Accepted at the 35th Conference on Neural Information Processing Systems (NeurIPS 2021)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.13100">arXiv:2110.13100</a> <span> [<a href="https://arxiv.org/pdf/2110.13100">pdf</a>, <a href="https://arxiv.org/format/2110.13100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Parameter Prediction for Unseen Deep Architectures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Knyazev%2C+B">Boris Knyazev</a>, <a href="/search/cs?searchtype=author&query=Drozdzal%2C+M">Michal Drozdzal</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Romero-Soriano%2C+A">Adriana Romero-Soriano</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.13100v1-abstract-short" style="display: inline;"> Deep learning has been successful in automating the design of features in machine learning pipelines. However, the algorithms optimizing neural network parameters remain largely hand-designed and computationally inefficient. We study if we can use deep learning to directly predict these parameters by exploiting the past knowledge of training other networks. We introduce a large-scale dataset of di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13100v1-abstract-full').style.display = 'inline'; document.getElementById('2110.13100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.13100v1-abstract-full" style="display: none;"> Deep learning has been successful in automating the design of features in machine learning pipelines. However, the algorithms optimizing neural network parameters remain largely hand-designed and computationally inefficient. We study if we can use deep learning to directly predict these parameters by exploiting the past knowledge of training other networks. We introduce a large-scale dataset of diverse computational graphs of neural architectures - DeepNets-1M - and use it to explore parameter prediction on CIFAR-10 and ImageNet. By leveraging advances in graph neural networks, we propose a hypernetwork that can predict performant parameters in a single forward pass taking a fraction of a second, even on a CPU. The proposed model achieves surprisingly good performance on unseen and diverse networks. For example, it is able to predict all 24 million parameters of a ResNet-50 achieving a 60% accuracy on CIFAR-10. On ImageNet, top-5 accuracy of some of our networks approaches 50%. Our task along with the model and results can potentially lead to a new, more computationally efficient paradigm of training networks. Our model also learns a strong representation of neural architectures enabling their analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13100v1-abstract-full').style.display = 'none'; document.getElementById('2110.13100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021 camera ready, the code is available at https://github.com/facebookresearch/ppuda</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.14280">arXiv:2107.14280</a> <span> [<a href="https://arxiv.org/pdf/2107.14280">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Deciphering Cryptic Behavior in Bimetallic Transition Metal Complexes with Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Taylor%2C+M+G">Michael G. Taylor</a>, <a href="/search/cs?searchtype=author&query=Nandy%2C+A">Aditya Nandy</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C+C">Connie C. Lu</a>, <a href="/search/cs?searchtype=author&query=Kulik%2C+H+J">Heather J. Kulik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.14280v1-abstract-short" style="display: inline;"> The rational tailoring of transition metal complexes is necessary to address outstanding challenges in energy utilization and storage. Heterobimetallic transition metal complexes that exhibit metal-metal bonding in stacked "double decker" ligand structures are an emerging, attractive platform for catalysis, but their properties are challenging to predict prior to laborious synthetic efforts. We de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.14280v1-abstract-full').style.display = 'inline'; document.getElementById('2107.14280v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.14280v1-abstract-full" style="display: none;"> The rational tailoring of transition metal complexes is necessary to address outstanding challenges in energy utilization and storage. Heterobimetallic transition metal complexes that exhibit metal-metal bonding in stacked "double decker" ligand structures are an emerging, attractive platform for catalysis, but their properties are challenging to predict prior to laborious synthetic efforts. We demonstrate an alternative, data-driven approach to uncovering structure-property relationships for rational bimetallic complex design. We tailor graph-based representations of the metal-local environment for these heterobimetallic complexes for use in training of multiple linear regression and kernel ridge regression (KRR) models. Focusing on oxidation potentials, we obtain a set of 28 experimentally characterized complexes to develop a multiple linear regression model. On this training set, we achieve good accuracy (mean absolute error, MAE, of 0.25 V) and preserve transferability to unseen experimental data with a new ligand structure. We trained a KRR model on a subset of 330 structurally characterized heterobimetallics to predict the degree of metal-metal bonding. This KRR model predicts relative metal-metal bond lengths in the test set to within 5%, and analysis of key features reveals the fundamental atomic contributions (e.g., the valence electron configuration) that most strongly influence the behavior of complexes. Our work provides guidance for rational bimetallic design, suggesting that properties including the formal shortness ratio should be transferable from one period to another. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.14280v1-abstract-full').style.display = 'none'; document.getElementById('2107.14280v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.13109">arXiv:2106.13109</a> <span> [<a href="https://arxiv.org/pdf/2106.13109">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Machine learning to tame divergent density functional approximations: a new path to consensus materials design principles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+C">Chenru Duan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shuxin Chen</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+M+G">Michael G. Taylor</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fang Liu</a>, <a href="/search/cs?searchtype=author&query=Kulik%2C+H+J">Heather J. Kulik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.13109v1-abstract-short" style="display: inline;"> Computational virtual high-throughput screening (VHTS) with density functional theory (DFT) and machine-learning (ML)-acceleration is essential in rapid materials discovery. By necessity, efficient DFT-based workflows are carried out with a single density functional approximation (DFA). Nevertheless, properties evaluated with different DFAs can be expected to disagree for the cases with challengin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.13109v1-abstract-full').style.display = 'inline'; document.getElementById('2106.13109v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.13109v1-abstract-full" style="display: none;"> Computational virtual high-throughput screening (VHTS) with density functional theory (DFT) and machine-learning (ML)-acceleration is essential in rapid materials discovery. By necessity, efficient DFT-based workflows are carried out with a single density functional approximation (DFA). Nevertheless, properties evaluated with different DFAs can be expected to disagree for the cases with challenging electronic structure (e.g., open shell transition metal complexes, TMCs) for which rapid screening is most needed and accurate benchmarks are often unavailable. To quantify the effect of DFA bias, we introduce an approach to rapidly obtain property predictions from 23 representative DFAs spanning multiple families and "rungs" (e.g., semi-local to double hybrid) and basis sets on over 2,000 TMCs. Although computed properties (e.g., spin-state ordering and frontier orbital gap) naturally differ by DFA, high linear correlations persist across all DFAs. We train independent ML models for each DFA and observe convergent trends in feature importance; these features thus provide DFA-invariant, universal design rules. We devise a strategy to train ML models informed by all 23 DFAs and use them to predict properties (e.g., spin-splitting energy) of over 182k TMCs. By requiring consensus of the ANN-predicted DFA properties, we improve correspondence of these computational lead compounds with literature-mined, experimental compounds over the single-DFA approach typically employed. Both feature analysis and consensus-based ML provide efficient, alternative paths to overcome accuracy limitations of practical DFT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.13109v1-abstract-full').style.display = 'none'; document.getElementById('2106.13109v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.00670">arXiv:2104.00670</a> <span> [<a href="https://arxiv.org/pdf/2104.00670">pdf</a>, <a href="https://arxiv.org/format/2104.00670">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unconstrained Scene Generation with Locally Conditioned Radiance Fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeVries%2C+T">Terrance DeVries</a>, <a href="/search/cs?searchtype=author&query=Bautista%2C+M+A">Miguel Angel Bautista</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+N">Nitish Srivastava</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Susskind%2C+J+M">Joshua M. Susskind</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.00670v1-abstract-short" style="display: inline;"> We tackle the challenge of learning a distribution over complex, realistic, indoor scenes. In this paper, we introduce Generative Scene Networks (GSN), which learns to decompose scenes into a collection of many local radiance fields that can be rendered from a free moving camera. Our model can be used as a prior to generate new scenes, or to complete a scene given only sparse 2D observations. Rece… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.00670v1-abstract-full').style.display = 'inline'; document.getElementById('2104.00670v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.00670v1-abstract-full" style="display: none;"> We tackle the challenge of learning a distribution over complex, realistic, indoor scenes. In this paper, we introduce Generative Scene Networks (GSN), which learns to decompose scenes into a collection of many local radiance fields that can be rendered from a free moving camera. Our model can be used as a prior to generate new scenes, or to complete a scene given only sparse 2D observations. Recent work has shown that generative models of radiance fields can capture properties such as multi-view consistency and view-dependent lighting. However, these models are specialized for constrained viewing of single objects, such as cars or faces. Due to the size and complexity of realistic indoor environments, existing models lack the representational capacity to adequately capture them. Our decomposition scheme scales to larger and more complex scenes while preserving details and diversity, and the learned prior enables high-quality rendering from viewpoints that are significantly different from observed viewpoints. When compared to existing models, GSN produces quantitatively higher-quality scene renderings across several different scene datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.00670v1-abstract-full').style.display = 'none'; document.getElementById('2104.00670v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.17105">arXiv:2103.17105</a> <span> [<a href="https://arxiv.org/pdf/2103.17105">pdf</a>, <a href="https://arxiv.org/format/2103.17105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> The GIST and RIST of Iterative Self-Training for Semi-Supervised Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Teh%2C+E+W">Eu Wern Teh</a>, <a href="/search/cs?searchtype=author&query=DeVries%2C+T">Terrance DeVries</a>, <a href="/search/cs?searchtype=author&query=Duke%2C+B">Brendan Duke</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+R">Ruowei Jiang</a>, <a href="/search/cs?searchtype=author&query=Aarabi%2C+P">Parham Aarabi</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.17105v3-abstract-short" style="display: inline;"> We consider the task of semi-supervised semantic segmentation, where we aim to produce pixel-wise semantic object masks given only a small number of human-labeled training examples. We focus on iterative self-training methods in which we explore the behavior of self-training over multiple refinement stages. We show that iterative self-training leads to performance degradation if done na茂vely with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.17105v3-abstract-full').style.display = 'inline'; document.getElementById('2103.17105v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.17105v3-abstract-full" style="display: none;"> We consider the task of semi-supervised semantic segmentation, where we aim to produce pixel-wise semantic object masks given only a small number of human-labeled training examples. We focus on iterative self-training methods in which we explore the behavior of self-training over multiple refinement stages. We show that iterative self-training leads to performance degradation if done na茂vely with a fixed ratio of human-labeled to pseudo-labeled training examples. We propose Greedy Iterative Self-Training (GIST) and Random Iterative Self-Training (RIST) strategies that alternate between training on either human-labeled data or pseudo-labeled data at each refinement stage, resulting in a performance boost rather than degradation. We further show that GIST and RIST can be combined with existing semi-supervised learning methods to boost performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.17105v3-abstract-full').style.display = 'none'; document.getElementById('2103.17105v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the Conference on Computer and Robot Vision (CRV), 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.03891">arXiv:2103.03891</a> <span> [<a href="https://arxiv.org/pdf/2103.03891">pdf</a>, <a href="https://arxiv.org/format/2103.03891">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LOHO: Latent Optimization of Hairstyles via Orthogonalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saha%2C+R">Rohit Saha</a>, <a href="/search/cs?searchtype=author&query=Duke%2C+B">Brendan Duke</a>, <a href="/search/cs?searchtype=author&query=Shkurti%2C+F">Florian Shkurti</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Aarabi%2C+P">Parham Aarabi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.03891v2-abstract-short" style="display: inline;"> Hairstyle transfer is challenging due to hair structure differences in the source and target hair. Therefore, we propose Latent Optimization of Hairstyles via Orthogonalization (LOHO), an optimization-based approach using GAN inversion to infill missing hair structure details in latent space during hairstyle transfer. Our approach decomposes hair into three attributes: perceptual structure, appear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.03891v2-abstract-full').style.display = 'inline'; document.getElementById('2103.03891v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.03891v2-abstract-full" style="display: none;"> Hairstyle transfer is challenging due to hair structure differences in the source and target hair. Therefore, we propose Latent Optimization of Hairstyles via Orthogonalization (LOHO), an optimization-based approach using GAN inversion to infill missing hair structure details in latent space during hairstyle transfer. Our approach decomposes hair into three attributes: perceptual structure, appearance, and style, and includes tailored losses to model each of these attributes independently. Furthermore, we propose two-stage optimization and gradient orthogonalization to enable disentangled latent space optimization of our hair attributes. Using LOHO for latent space manipulation, users can synthesize novel photorealistic images by manipulating hair attributes either individually or jointly, transferring the desired attributes from reference hairstyles. LOHO achieves a superior FID compared with the current state-of-the-art (SOTA) for hairstyle transfer. Additionally, LOHO preserves the subject's identity comparably well according to PSNR and SSIM when compared to SOTA image embedding pipelines. Code is available at https://github.com/dukebw/LOHO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.03891v2-abstract-full').style.display = 'none'; document.getElementById('2103.03891v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.08833">arXiv:2101.08833</a> <span> [<a href="https://arxiv.org/pdf/2101.08833">pdf</a>, <a href="https://arxiv.org/format/2101.08833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SSTVOS: Sparse Spatiotemporal Transformers for Video Object Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duke%2C+B">Brendan Duke</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+A">Abdalla Ahmed</a>, <a href="/search/cs?searchtype=author&query=Wolf%2C+C">Christian Wolf</a>, <a href="/search/cs?searchtype=author&query=Aarabi%2C+P">Parham Aarabi</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.08833v2-abstract-short" style="display: inline;"> In this paper we introduce a Transformer-based approach to video object segmentation (VOS). To address compounding error and scalability issues of prior work, we propose a scalable, end-to-end method for VOS called Sparse Spatiotemporal Transformers (SST). SST extracts per-pixel representations for each object in a video using sparse attention over spatiotemporal features. Our attention-based form… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.08833v2-abstract-full').style.display = 'inline'; document.getElementById('2101.08833v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.08833v2-abstract-full" style="display: none;"> In this paper we introduce a Transformer-based approach to video object segmentation (VOS). To address compounding error and scalability issues of prior work, we propose a scalable, end-to-end method for VOS called Sparse Spatiotemporal Transformers (SST). SST extracts per-pixel representations for each object in a video using sparse attention over spatiotemporal features. Our attention-based formulation for VOS allows a model to learn to attend over a history of multiple frames and provides suitable inductive bias for performing correspondence-like computations necessary for solving motion segmentation. We demonstrate the effectiveness of attention-based over recurrent networks in the spatiotemporal domain. Our method achieves competitive results on YouTube-VOS and DAVIS 2017 with improved scalability and robustness to occlusions compared with the state of the art. Code is available at https://github.com/dukebw/SSTVOS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.08833v2-abstract-full').style.display = 'none'; document.getElementById('2101.08833v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2021 (Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.07922">arXiv:2101.07922</a> <span> [<a href="https://arxiv.org/pdf/2101.07922">pdf</a>, <a href="https://arxiv.org/format/2101.07922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LowKey: Leveraging Adversarial Attacks to Protect Social Media Users from Facial Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cherepanova%2C+V">Valeriia Cherepanova</a>, <a href="/search/cs?searchtype=author&query=Goldblum%2C+M">Micah Goldblum</a>, <a href="/search/cs?searchtype=author&query=Foley%2C+H">Harrison Foley</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+S">Shiyuan Duan</a>, <a href="/search/cs?searchtype=author&query=Dickerson%2C+J">John Dickerson</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a>, <a href="/search/cs?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.07922v2-abstract-short" style="display: inline;"> Facial recognition systems are increasingly deployed by private corporations, government agencies, and contractors for consumer services and mass surveillance programs alike. These systems are typically built by scraping social media profiles for user images. Adversarial perturbations have been proposed for bypassing facial recognition systems. However, existing methods fail on full-scale systems… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.07922v2-abstract-full').style.display = 'inline'; document.getElementById('2101.07922v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.07922v2-abstract-full" style="display: none;"> Facial recognition systems are increasingly deployed by private corporations, government agencies, and contractors for consumer services and mass surveillance programs alike. These systems are typically built by scraping social media profiles for user images. Adversarial perturbations have been proposed for bypassing facial recognition systems. However, existing methods fail on full-scale systems and commercial APIs. We develop our own adversarial filter that accounts for the entire image processing pipeline and is demonstrably effective against industrial-grade pipelines that include face detection and large scale databases. Additionally, we release an easy-to-use webtool that significantly degrades the accuracy of Amazon Rekognition and the Microsoft Azure Face Recognition API, reducing the accuracy of each to below 1%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.07922v2-abstract-full').style.display = 'none'; document.getElementById('2101.07922v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICLR 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.11543">arXiv:2012.11543</a> <span> [<a href="https://arxiv.org/pdf/2012.11543">pdf</a>, <a href="https://arxiv.org/format/2012.11543">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Building LEGO Using Deep Generative Models of Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thompson%2C+R">Rylee Thompson</a>, <a href="/search/cs?searchtype=author&query=Ghalebi%2C+E">Elahe Ghalebi</a>, <a href="/search/cs?searchtype=author&query=DeVries%2C+T">Terrance DeVries</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.11543v1-abstract-short" style="display: inline;"> Generative models are now used to create a variety of high-quality digital artifacts. Yet their use in designing physical objects has received far less attention. In this paper, we advocate for the construction toy, LEGO, as a platform for developing generative models of sequential assembly. We develop a generative model based on graph-structured neural networks that can learn from human-built str… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.11543v1-abstract-full').style.display = 'inline'; document.getElementById('2012.11543v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.11543v1-abstract-full" style="display: none;"> Generative models are now used to create a variety of high-quality digital artifacts. Yet their use in designing physical objects has received far less attention. In this paper, we advocate for the construction toy, LEGO, as a platform for developing generative models of sequential assembly. We develop a generative model based on graph-structured neural networks that can learn from human-built structures and produce visually compelling designs. Our code is released at: https://github.com/uoguelph-mlrg/GenerativeLEGO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.11543v1-abstract-full').style.display = 'none'; document.getElementById('2012.11543v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2020 ML4eng workshop paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.06188">arXiv:2011.06188</a> <span> [<a href="https://arxiv.org/pdf/2011.06188">pdf</a>, <a href="https://arxiv.org/format/2011.06188">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Curriculum Learning Strategies in Neural Combinatorial Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lisicki%2C+M">Michal Lisicki</a>, <a href="/search/cs?searchtype=author&query=Afkanpour%2C+A">Arash Afkanpour</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.06188v1-abstract-short" style="display: inline;"> Neural combinatorial optimization (NCO) aims at designing problem-independent and efficient neural network-based strategies for solving combinatorial problems. The field recently experienced growth by successfully adapting architectures originally designed for machine translation. Even though the results are promising, a large gap still exists between NCO models and classic deterministic solvers,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.06188v1-abstract-full').style.display = 'inline'; document.getElementById('2011.06188v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.06188v1-abstract-full" style="display: none;"> Neural combinatorial optimization (NCO) aims at designing problem-independent and efficient neural network-based strategies for solving combinatorial problems. The field recently experienced growth by successfully adapting architectures originally designed for machine translation. Even though the results are promising, a large gap still exists between NCO models and classic deterministic solvers, both in terms of accuracy and efficiency. One of the drawbacks of current approaches is the inefficiency of training on multiple problem sizes. Curriculum learning strategies have been shown helpful in increasing performance in the multi-task setting. In this work, we focus on designing a curriculum learning-based training procedure that can help existing architectures achieve competitive performance on a large range of problem sizes simultaneously. We provide a systematic investigation of several training procedures and use the insights gained to motivate application of a psychologically-inspired approach to improve upon the classic curriculum method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.06188v1-abstract-full').style.display = 'none'; document.getElementById('2011.06188v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at Workshop on Learning Meets Combinatorial Algorithms at NeurIPS 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.03043">arXiv:2011.03043</a> <span> [<a href="https://arxiv.org/pdf/2011.03043">pdf</a>, <a href="https://arxiv.org/format/2011.03043">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Identifying and interpreting tuning dimensions in deep networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dey%2C+N+S">Nolan S. Dey</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+J+E">J. Eric Taylor</a>, <a href="/search/cs?searchtype=author&query=Tripp%2C+B+P">Bryan P. Tripp</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+A">Alexander Wong</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.03043v2-abstract-short" style="display: inline;"> In neuroscience, a tuning dimension is a stimulus attribute that accounts for much of the activation variance of a group of neurons. These are commonly used to decipher the responses of such groups. While researchers have attempted to manually identify an analogue to these tuning dimensions in deep neural networks, we are unaware of an automatic way to discover them. This work contributes an unsup… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.03043v2-abstract-full').style.display = 'inline'; document.getElementById('2011.03043v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.03043v2-abstract-full" style="display: none;"> In neuroscience, a tuning dimension is a stimulus attribute that accounts for much of the activation variance of a group of neurons. These are commonly used to decipher the responses of such groups. While researchers have attempted to manually identify an analogue to these tuning dimensions in deep neural networks, we are unaware of an automatic way to discover them. This work contributes an unsupervised framework for identifying and interpreting "tuning dimensions" in deep networks. Our method correctly identifies the tuning dimensions of a synthetic Gabor filter bank and tuning dimensions of the first two layers of InceptionV1 trained on ImageNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.03043v2-abstract-full').style.display = 'none'; document.getElementById('2011.03043v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 12 figures, Camera-ready for Shared Visual Representations in Human & Machine Intelligence NeurIPS Workshop 2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.09891">arXiv:2010.09891</a> <span> [<a href="https://arxiv.org/pdf/2010.09891">pdf</a>, <a href="https://arxiv.org/format/2010.09891">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Robust Optimization as Data Augmentation for Large-scale Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kong%2C+K">Kezhi Kong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guohao Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+M">Mucong Ding</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zuxuan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Chen Zhu</a>, <a href="/search/cs?searchtype=author&query=Ghanem%2C+B">Bernard Ghanem</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a>, <a href="/search/cs?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.09891v3-abstract-short" style="display: inline;"> Data augmentation helps neural networks generalize better by enlarging the training set, but it remains an open question how to effectively augment graph data to enhance the performance of GNNs (Graph Neural Networks). While most existing graph regularizers focus on manipulating graph topological structures by adding/removing edges, we offer a method to augment node features for better performance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.09891v3-abstract-full').style.display = 'inline'; document.getElementById('2010.09891v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.09891v3-abstract-full" style="display: none;"> Data augmentation helps neural networks generalize better by enlarging the training set, but it remains an open question how to effectively augment graph data to enhance the performance of GNNs (Graph Neural Networks). While most existing graph regularizers focus on manipulating graph topological structures by adding/removing edges, we offer a method to augment node features for better performance. We propose FLAG (Free Large-scale Adversarial Augmentation on Graphs), which iteratively augments node features with gradient-based adversarial perturbations during training. By making the model invariant to small fluctuations in input data, our method helps models generalize to out-of-distribution samples and boosts model performance at test time. FLAG is a general-purpose approach for graph data, which universally works in node classification, link prediction, and graph classification tasks. FLAG is also highly flexible and scalable, and is deployable with arbitrary GNN backbones and large-scale datasets. We demonstrate the efficacy and stability of our method through extensive experiments and ablation studies. We also provide intuitive observations for a deeper understanding of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.09891v3-abstract-full').style.display = 'none'; document.getElementById('2010.09891v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.04112">arXiv:2010.04112</a> <span> [<a href="https://arxiv.org/pdf/2010.04112">pdf</a>, <a href="https://arxiv.org/format/2010.04112">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3410992.3411001">10.1145/3410992.3411001 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Information-Driven Adaptive Sensing Based on Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Murad%2C+A">Abdulmajid Murad</a>, <a href="/search/cs?searchtype=author&query=Kraemer%2C+F+A">Frank Alexander Kraemer</a>, <a href="/search/cs?searchtype=author&query=Bach%2C+K">Kerstin Bach</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.04112v1-abstract-short" style="display: inline;"> In order to make better use of deep reinforcement learning in the creation of sensing policies for resource-constrained IoT devices, we present and study a novel reward function based on the Fisher information value. This reward function enables IoT sensor devices to learn to spend available energy on measurements at otherwise unpredictable moments, while conserving energy at times when measuremen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.04112v1-abstract-full').style.display = 'inline'; document.getElementById('2010.04112v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.04112v1-abstract-full" style="display: none;"> In order to make better use of deep reinforcement learning in the creation of sensing policies for resource-constrained IoT devices, we present and study a novel reward function based on the Fisher information value. This reward function enables IoT sensor devices to learn to spend available energy on measurements at otherwise unpredictable moments, while conserving energy at times when measurements would provide little new information. This is a highly general approach, which allows for a wide range of use cases without significant human design effort or hyper-parameter tuning. We illustrate the approach in a scenario of workplace noise monitoring, where results show that the learned behavior outperforms a uniform sampling strategy and comes close to a near-optimal oracle solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.04112v1-abstract-full').style.display = 'none'; document.getElementById('2010.04112v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 10th International Conference on the Internet of Things (IoT20), October 6-9, 2020, Malmo, Sweden </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.02276">arXiv:2009.02276</a> <span> [<a href="https://arxiv.org/pdf/2009.02276">pdf</a>, <a href="https://arxiv.org/format/2009.02276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Witches' Brew: Industrial Scale Data Poisoning via Gradient Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Geiping%2C+J">Jonas Geiping</a>, <a href="/search/cs?searchtype=author&query=Fowl%2C+L">Liam Fowl</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W+R">W. Ronny Huang</a>, <a href="/search/cs?searchtype=author&query=Czaja%2C+W">Wojciech Czaja</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G">Gavin Taylor</a>, <a href="/search/cs?searchtype=author&query=Moeller%2C+M">Michael Moeller</a>, <a href="/search/cs?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.02276v2-abstract-short" style="display: inline;"> Data Poisoning attacks modify training data to maliciously control a model trained on such data. In this work, we focus on targeted poisoning attacks which cause a reclassification of an unmodified test image and as such breach model integrity. We consider a particularly malicious poisoning attack that is both "from scratch" and "clean label", meaning we analyze an attack that successfully works a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.02276v2-abstract-full').style.display = 'inline'; document.getElementById('2009.02276v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.02276v2-abstract-full" style="display: none;"> Data Poisoning attacks modify training data to maliciously control a model trained on such data. In this work, we focus on targeted poisoning attacks which cause a reclassification of an unmodified test image and as such breach model integrity. We consider a particularly malicious poisoning attack that is both "from scratch" and "clean label", meaning we analyze an attack that successfully works against new, randomly initialized models, and is nearly imperceptible to humans, all while perturbing only a small fraction of the training data. Previous poisoning attacks against deep neural networks in this setting have been limited in scope and success, working only in simplified settings or being prohibitively expensive for large datasets. The central mechanism of the new attack is matching the gradient direction of malicious examples. We analyze why this works, supplement with practical considerations. and show its threat to real-world practitioners, finding that it is the first poisoning method to cause targeted misclassification in modern deep networks trained from scratch on a full-sized, poisoned ImageNet dataset. Finally we demonstrate the limitations of existing defensive strategies against such an attack, concluding that data poisoning is a credible threat, even for large-scale deep learning systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.02276v2-abstract-full').style.display = 'none'; document.getElementById('2009.02276v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">First two authors contributed equally. Last two authors contributed equally. 21 pages, 11 figures. Published at ICLR 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.15255">arXiv:2007.15255</a> <span> [<a href="https://arxiv.org/pdf/2007.15255">pdf</a>, <a href="https://arxiv.org/format/2007.15255">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Instance Selection for GANs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeVries%2C+T">Terrance DeVries</a>, <a href="/search/cs?searchtype=author&query=Drozdzal%2C+M">Michal Drozdzal</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.15255v2-abstract-short" style="display: inline;"> Recent advances in Generative Adversarial Networks (GANs) have led to their widespread adoption for the purposes of generating high quality synthetic imagery. While capable of generating photo-realistic images, these models often produce unrealistic samples which fall outside of the data manifold. Several recently proposed techniques attempt to avoid spurious samples, either by rejecting them afte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.15255v2-abstract-full').style.display = 'inline'; document.getElementById('2007.15255v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.15255v2-abstract-full" style="display: none;"> Recent advances in Generative Adversarial Networks (GANs) have led to their widespread adoption for the purposes of generating high quality synthetic imagery. While capable of generating photo-realistic images, these models often produce unrealistic samples which fall outside of the data manifold. Several recently proposed techniques attempt to avoid spurious samples, either by rejecting them after generation, or by truncating the model's latent space. While effective, these methods are inefficient, as a large fraction of training time and model capacity are dedicated towards samples that will ultimately go unused. In this work we propose a novel approach to improve sample quality: altering the training dataset via instance selection before model training has taken place. By refining the empirical data distribution before training, we redirect model capacity towards high-density regions, which ultimately improves sample fidelity, lowers model capacity requirements, and significantly reduces training time. Code is available at https://github.com/uoguelph-mlrg/instance_selection_for_gans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.15255v2-abstract-full').style.display = 'none'; document.getElementById('2007.15255v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.05756">arXiv:2007.05756</a> <span> [<a href="https://arxiv.org/pdf/2007.05756">pdf</a>, <a href="https://arxiv.org/format/2007.05756">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Generative Compositional Augmentations for Scene Graph Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Knyazev%2C+B">Boris Knyazev</a>, <a href="/search/cs?searchtype=author&query=de+Vries%2C+H">Harm de Vries</a>, <a href="/search/cs?searchtype=author&query=Cangea%2C+C">C膬t膬lina Cangea</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+G+W">Graham W. Taylor</a>, <a href="/search/cs?searchtype=author&query=Courville%2C+A">Aaron Courville</a>, <a href="/search/cs?searchtype=author&query=Belilovsky%2C+E">Eugene Belilovsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.05756v3-abstract-short" style="display: inline;"> Inferring objects and their relationships from an image in the form of a scene graph is useful in many applications at the intersection of vision and language. We consider a challenging problem of compositional generalization that emerges in this task due to a long tail data distribution. Current scene graph generation models are trained on a tiny fraction of the distribution corresponding to the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.05756v3-abstract-full').style.display = 'inline'; document.getElementById('2007.05756v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.05756v3-abstract-full" style="display: none;"> Inferring objects and their relationships from an image in the form of a scene graph is useful in many applications at the intersection of vision and language. We consider a challenging problem of compositional generalization that emerges in this task due to a long tail data distribution. Current scene graph generation models are trained on a tiny fraction of the distribution corresponding to the most frequent compositions, e.g. <cup, on, table>. However, test images might contain zero- and few-shot compositions of objects and relationships, e.g. <cup, on, surfboard>. Despite each of the object categories and the predicate (e.g. 'on') being frequent in the training data, the models often fail to properly understand such unseen or rare compositions. To improve generalization, it is natural to attempt increasing the diversity of the training distribution. However, in the graph domain this is non-trivial. To that end, we propose a method to synthesize rare yet plausible scene graphs by perturbing real ones. We then propose and empirically study a model based on conditional generative adversarial networks (GANs) that allows us to generate visual features of perturbed scene graphs and learn from them in a joint fashion. When evaluated on the Visual Genome dataset, our approach yields marginal, but consistent improvements in zero- and few-shot metrics. We analyze the limitations of our approach indicating promising directions for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.05756v3-abstract-full').style.display = 'none'; document.getElementById('2007.05756v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2021 camera ready. Added more baselines, combining GANs with Neural Motifs and t-sne visualizations. Code is available at https://github.com/bknyaz/sgg</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Taylor%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>