Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–49 of 49 results for author: <span class="mathjax">Ahuja, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ahuja%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ahuja, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ahuja%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ahuja, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04160">arXiv:2411.04160</a> <span> [<a href="https://arxiv.org/pdf/2411.04160">pdf</a>, <a href="https://arxiv.org/format/2411.04160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Topology Bench: Systematic Graph Based Benchmarking for Core Optical Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Matzner%2C+R">Robin Matzner</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Akanksha Ahuja</a>, <a href="/search/cs?searchtype=author&query=Sadeghi%2C+R">Rasoul Sadeghi</a>, <a href="/search/cs?searchtype=author&query=Doherty%2C+M">Michael Doherty</a>, <a href="/search/cs?searchtype=author&query=Beghelli%2C+A">Alejandra Beghelli</a>, <a href="/search/cs?searchtype=author&query=Savory%2C+S+J">Seb J. Savory</a>, <a href="/search/cs?searchtype=author&query=Bayvel%2C+P">Polina Bayvel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04160v1-abstract-short" style="display: inline;"> Topology Bench is a comprehensive topology dataset designed to accelerate benchmarking studies in optical networks. The dataset, focusing on core optical networks, comprises publicly accessible and ready-to-use topologies, including (a) 105 georeferenced real-world optical networks and (b) 270,900 validated synthetic topologies. Prior research on real-world core optical networks has been character… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04160v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04160v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04160v1-abstract-full" style="display: none;"> Topology Bench is a comprehensive topology dataset designed to accelerate benchmarking studies in optical networks. The dataset, focusing on core optical networks, comprises publicly accessible and ready-to-use topologies, including (a) 105 georeferenced real-world optical networks and (b) 270,900 validated synthetic topologies. Prior research on real-world core optical networks has been characterised by fragmented open data sources and disparate individual studies. Moreover, previous efforts have notably failed to provide synthetic data at a scale comparable to our present study. Topology Bench addresses this limitation, offering a unified resource and represents a 61.5% increase in spatially-referenced real world optical networks. To benchmark and identify the fundamental nature of optical network topologies through the lens of graph-theoretical analysis, we analyse both real and synthetic networks using structural, spatial and spectral metrics. Our comparative analysis identifies constraints in real optical network diversity and illustrates how synthetic networks can complement and expand the range of topologies available for use. Currently, topologies are selected based on subjective criteria, such as preference, data availability, or perceived suitability, leading to potential biases and limited representativeness. Our framework enhances the generalisability of optical network research by providing a more objective and systematic approach to topology selection. A statistical and correlation analysis reveals the quantitative range of all of these graph metrics and the relationships between them. Finally, we apply unsupervised machine learning to cluster real-world topologies into distinctive groups using nine optimal graph metrics using K-means. We conclude the analysis by providing guidance on how to use such clusters to select a diverse set of topologies for future studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04160v1-abstract-full').style.display = 'none'; document.getElementById('2411.04160v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10537">arXiv:2409.10537</a> <span> [<a href="https://arxiv.org/pdf/2409.10537">pdf</a>, <a href="https://arxiv.org/format/2409.10537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Practical Challenges of Progressive Data Science in Healthcare </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Roshan%2C+F+Z">Faisal Zaki Roshan</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Abhishek Ahuja</a>, <a href="/search/cs?searchtype=author&query=Rajabiyazdi%2C+F">Fateme Rajabiyazdi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10537v1-abstract-short" style="display: inline;"> The healthcare system collects extensive data, encompassing patient administrative information, clinical measurements, and home-monitored health metrics. To support informed decision-making in patient care and treatment management, it is essential to review and analyze these diverse data sources. Data visualization is a promising solution to navigate healthcare datasets, uncover hidden patterns, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10537v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10537v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10537v1-abstract-full" style="display: none;"> The healthcare system collects extensive data, encompassing patient administrative information, clinical measurements, and home-monitored health metrics. To support informed decision-making in patient care and treatment management, it is essential to review and analyze these diverse data sources. Data visualization is a promising solution to navigate healthcare datasets, uncover hidden patterns, and derive actionable insights. However, the process of creating interactive data visualization can be rather challenging due to the size and complexity of these datasets. Progressive data science offers a potential solution, enabling interaction with intermediate results during data exploration. In this paper, we reflect on our experiences with three health data visualization projects employing a progressive data science approach. We explore the practical implications and challenges faced at various stages, including data selection, pre-processing, data mining, transformation, and interpretation and evaluation. We highlighted unique challenges and opportunities for three projects, including visualizing surgical outcomes, tracking patient bed transfers, and integrating patient-generated data visualizations into the healthcare setting. We identified the following challenges: inconsistent data collection practices, the complexity of adapting to varying data completeness levels, and the need to modify designs for real-world deployment. Our findings underscore the need for careful consideration of using a progressive data science approach when designing visualizations for healthcare settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10537v1-abstract-full').style.display = 'none'; document.getElementById('2409.10537v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01369">arXiv:2409.01369</a> <span> [<a href="https://arxiv.org/pdf/2409.01369">pdf</a>, <a href="https://arxiv.org/format/2409.01369">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Imitating Language via Scalable Inverse Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wulfmeier%2C+M">Markus Wulfmeier</a>, <a href="/search/cs?searchtype=author&query=Bloesch%2C+M">Michael Bloesch</a>, <a href="/search/cs?searchtype=author&query=Vieillard%2C+N">Nino Vieillard</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Bornschein%2C+J">Jorg Bornschein</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Sandy Huang</a>, <a href="/search/cs?searchtype=author&query=Sokolov%2C+A">Artem Sokolov</a>, <a href="/search/cs?searchtype=author&query=Barnes%2C+M">Matt Barnes</a>, <a href="/search/cs?searchtype=author&query=Desjardins%2C+G">Guillaume Desjardins</a>, <a href="/search/cs?searchtype=author&query=Bewley%2C+A">Alex Bewley</a>, <a href="/search/cs?searchtype=author&query=Bechtle%2C+S+M+E">Sarah Maria Elisabeth Bechtle</a>, <a href="/search/cs?searchtype=author&query=Springenberg%2C+J+T">Jost Tobias Springenberg</a>, <a href="/search/cs?searchtype=author&query=Momchev%2C+N">Nikola Momchev</a>, <a href="/search/cs?searchtype=author&query=Bachem%2C+O">Olivier Bachem</a>, <a href="/search/cs?searchtype=author&query=Geist%2C+M">Matthieu Geist</a>, <a href="/search/cs?searchtype=author&query=Riedmiller%2C+M">Martin Riedmiller</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01369v2-abstract-short" style="display: inline;"> The majority of language model training builds on imitation learning. It covers pretraining, supervised fine-tuning, and affects the starting conditions for reinforcement learning from human feedback (RLHF). The simplicity and scalability of maximum likelihood estimation (MLE) for next token prediction led to its role as predominant paradigm. However, the broader field of imitation learning can mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01369v2-abstract-full').style.display = 'inline'; document.getElementById('2409.01369v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01369v2-abstract-full" style="display: none;"> The majority of language model training builds on imitation learning. It covers pretraining, supervised fine-tuning, and affects the starting conditions for reinforcement learning from human feedback (RLHF). The simplicity and scalability of maximum likelihood estimation (MLE) for next token prediction led to its role as predominant paradigm. However, the broader field of imitation learning can more effectively utilize the sequential structure underlying autoregressive generation. We focus on investigating the inverse reinforcement learning (IRL) perspective to imitation, extracting rewards and directly optimizing sequences instead of individual token likelihoods and evaluate its benefits for fine-tuning large language models. We provide a new angle, reformulating inverse soft-Q-learning as a temporal difference regularized extension of MLE. This creates a principled connection between MLE and IRL and allows trading off added complexity with increased performance and diversity of generations in the supervised fine-tuning (SFT) setting. We find clear advantages for IRL-based imitation, in particular for retaining diversity while maximizing task performance, rendering IRL a strong alternative on fixed SFT datasets even without online data generation. Our analysis of IRL-extracted reward functions further indicates benefits for more robust reward functions via tighter integration of supervised and preference-based LLM post-training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01369v2-abstract-full').style.display = 'none'; document.getElementById('2409.01369v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10179">arXiv:2404.10179</a> <span> [<a href="https://arxiv.org/pdf/2404.10179">pdf</a>, <a href="https://arxiv.org/format/2404.10179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Instructable Agents Across Many Simulated Worlds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=SIMA+Team"> SIMA Team</a>, <a href="/search/cs?searchtype=author&query=Raad%2C+M+A">Maria Abi Raad</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Barros%2C+C">Catarina Barros</a>, <a href="/search/cs?searchtype=author&query=Besse%2C+F">Frederic Besse</a>, <a href="/search/cs?searchtype=author&query=Bolt%2C+A">Andrew Bolt</a>, <a href="/search/cs?searchtype=author&query=Bolton%2C+A">Adrian Bolton</a>, <a href="/search/cs?searchtype=author&query=Brownfield%2C+B">Bethanie Brownfield</a>, <a href="/search/cs?searchtype=author&query=Buttimore%2C+G">Gavin Buttimore</a>, <a href="/search/cs?searchtype=author&query=Cant%2C+M">Max Cant</a>, <a href="/search/cs?searchtype=author&query=Chakera%2C+S">Sarah Chakera</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+S+C+Y">Stephanie C. Y. Chan</a>, <a href="/search/cs?searchtype=author&query=Clune%2C+J">Jeff Clune</a>, <a href="/search/cs?searchtype=author&query=Collister%2C+A">Adrian Collister</a>, <a href="/search/cs?searchtype=author&query=Copeman%2C+V">Vikki Copeman</a>, <a href="/search/cs?searchtype=author&query=Cullum%2C+A">Alex Cullum</a>, <a href="/search/cs?searchtype=author&query=Dasgupta%2C+I">Ishita Dasgupta</a>, <a href="/search/cs?searchtype=author&query=de+Cesare%2C+D">Dario de Cesare</a>, <a href="/search/cs?searchtype=author&query=Di+Trapani%2C+J">Julia Di Trapani</a>, <a href="/search/cs?searchtype=author&query=Donchev%2C+Y">Yani Donchev</a>, <a href="/search/cs?searchtype=author&query=Dunleavy%2C+E">Emma Dunleavy</a>, <a href="/search/cs?searchtype=author&query=Engelcke%2C+M">Martin Engelcke</a>, <a href="/search/cs?searchtype=author&query=Faulkner%2C+R">Ryan Faulkner</a>, <a href="/search/cs?searchtype=author&query=Garcia%2C+F">Frankie Garcia</a>, <a href="/search/cs?searchtype=author&query=Gbadamosi%2C+C">Charles Gbadamosi</a> , et al. (69 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10179v3-abstract-short" style="display: inline;"> Building embodied AI systems that can follow arbitrary language instructions in any 3D environment is a key challenge for creating general AI. Accomplishing this goal requires learning to ground language in perception and embodied actions, in order to accomplish complex tasks. The Scalable, Instructable, Multiworld Agent (SIMA) project tackles this by training agents to follow free-form instructio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10179v3-abstract-full').style.display = 'inline'; document.getElementById('2404.10179v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10179v3-abstract-full" style="display: none;"> Building embodied AI systems that can follow arbitrary language instructions in any 3D environment is a key challenge for creating general AI. Accomplishing this goal requires learning to ground language in perception and embodied actions, in order to accomplish complex tasks. The Scalable, Instructable, Multiworld Agent (SIMA) project tackles this by training agents to follow free-form instructions across a diverse range of virtual 3D environments, including curated research environments as well as open-ended, commercial video games. Our goal is to develop an instructable agent that can accomplish anything a human can do in any simulated 3D environment. Our approach focuses on language-driven generality while imposing minimal assumptions. Our agents interact with environments in real-time using a generic, human-like interface: the inputs are image observations and language instructions and the outputs are keyboard-and-mouse actions. This general approach is challenging, but it allows agents to ground language across many visually complex and semantically rich environments while also allowing us to readily run agents in new environments. In this paper we describe our motivation and goal, the initial progress we have made, and promising preliminary results on several diverse research environments and a variety of commercial video games. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10179v3-abstract-full').style.display = 'none'; document.getElementById('2404.10179v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span> [<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&query=Love%2C+J">Juliette Love</a> , et al. (1112 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v5-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v5-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra's state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5's long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (>99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'none'; document.getElementById('2403.05530v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span> [<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.07199">arXiv:2312.07199</a> <span> [<a href="https://arxiv.org/pdf/2312.07199">pdf</a>, <a href="https://arxiv.org/format/2312.07199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SeasFire as a Multivariate Earth System Datacube for Wildfire Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karasante%2C+I">Ilektra Karasante</a>, <a href="/search/cs?searchtype=author&query=Alonso%2C+L">Lazaro Alonso</a>, <a href="/search/cs?searchtype=author&query=Prapas%2C+I">Ioannis Prapas</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Akanksha Ahuja</a>, <a href="/search/cs?searchtype=author&query=Carvalhais%2C+N">Nuno Carvalhais</a>, <a href="/search/cs?searchtype=author&query=Papoutsis%2C+I">Ioannis Papoutsis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.07199v2-abstract-short" style="display: inline;"> The global occurrence, scale, and frequency of wildfires pose significant threats to ecosystem services and human livelihoods. To effectively quantify and attribute the antecedent conditions for wildfires, a thorough understanding of Earth system dynamics is imperative. In response, we introduce the SeasFire datacube, a meticulously curated spatiotemporal dataset tailored for global sub-seasonal t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07199v2-abstract-full').style.display = 'inline'; document.getElementById('2312.07199v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.07199v2-abstract-full" style="display: none;"> The global occurrence, scale, and frequency of wildfires pose significant threats to ecosystem services and human livelihoods. To effectively quantify and attribute the antecedent conditions for wildfires, a thorough understanding of Earth system dynamics is imperative. In response, we introduce the SeasFire datacube, a meticulously curated spatiotemporal dataset tailored for global sub-seasonal to seasonal wildfire modeling via Earth observation. The SeasFire datacube comprises of 59 variables encompassing climate, vegetation, oceanic indices, and human factors, has an 8-day temporal resolution and a spatial resolution of 0.25$^{\circ}$, and spans from 2001 to 2021. We showcase the versatility of SeasFire for exploring the variability and seasonality of wildfire drivers, modeling causal links between ocean-climate teleconnections and wildfires, and predicting sub-seasonal wildfire patterns across multiple timescales with a Deep Learning model. We publicly release the SeasFire datacube and appeal to Earth system scientists and Machine Learning practitioners to use it for an improved understanding and anticipation of wildfires. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07199v2-abstract-full').style.display = 'none'; document.getElementById('2312.07199v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 9 figures, and 5 tables. Typos corrected</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.11564">arXiv:2309.11564</a> <span> [<a href="https://arxiv.org/pdf/2309.11564">pdf</a>, <a href="https://arxiv.org/format/2309.11564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical reinforcement learning with natural language subgoals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Kopparapu%2C+K">Kavya Kopparapu</a>, <a href="/search/cs?searchtype=author&query=Fergus%2C+R">Rob Fergus</a>, <a href="/search/cs?searchtype=author&query=Dasgupta%2C+I">Ishita Dasgupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.11564v1-abstract-short" style="display: inline;"> Hierarchical reinforcement learning has been a compelling approach for achieving goal directed behavior over long sequences of actions. However, it has been challenging to implement in realistic or open-ended environments. A main challenge has been to find the right space of sub-goals over which to instantiate a hierarchy. We present a novel approach where we use data from humans solving these tas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11564v1-abstract-full').style.display = 'inline'; document.getElementById('2309.11564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.11564v1-abstract-full" style="display: none;"> Hierarchical reinforcement learning has been a compelling approach for achieving goal directed behavior over long sequences of actions. However, it has been challenging to implement in realistic or open-ended environments. A main challenge has been to find the right space of sub-goals over which to instantiate a hierarchy. We present a novel approach where we use data from humans solving these tasks to softly supervise the goal space for a set of long range tasks in a 3D embodied environment. In particular, we use unconstrained natural language to parameterize this space. This has two advantages: first, it is easy to generate this data from naive human participants; second, it is flexible enough to represent a vast range of sub-goals in human-relevant tasks. Our approach outperforms agents that clone expert behavior on these tasks, as well as HRL from scratch without this supervised sub-goal space. Our work presents a novel approach to combining human expert supervision with the benefits and flexibility of reinforcement learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11564v1-abstract-full').style.display = 'none'; document.getElementById('2309.11564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.11582">arXiv:2306.11582</a> <span> [<a href="https://arxiv.org/pdf/2306.11582">pdf</a>, <a href="https://arxiv.org/format/2306.11582">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Computing a human-like reaction time metric from stable recurrent vision models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Goetschalckx%2C+L">Lore Goetschalckx</a>, <a href="/search/cs?searchtype=author&query=Govindarajan%2C+L+N">Lakshmi Narasimhan Govindarajan</a>, <a href="/search/cs?searchtype=author&query=Ashok%2C+A+K">Alekh Karkada Ashok</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aarit Ahuja</a>, <a href="/search/cs?searchtype=author&query=Sheinberg%2C+D+L">David L. Sheinberg</a>, <a href="/search/cs?searchtype=author&query=Serre%2C+T">Thomas Serre</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.11582v2-abstract-short" style="display: inline;"> The meteoric rise in the adoption of deep neural networks as computational models of vision has inspired efforts to "align" these models with humans. One dimension of interest for alignment includes behavioral choices, but moving beyond characterizing choice patterns to capturing temporal aspects of visual decision-making has been challenging. Here, we sketch a general-purpose methodology to const… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11582v2-abstract-full').style.display = 'inline'; document.getElementById('2306.11582v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.11582v2-abstract-full" style="display: none;"> The meteoric rise in the adoption of deep neural networks as computational models of vision has inspired efforts to "align" these models with humans. One dimension of interest for alignment includes behavioral choices, but moving beyond characterizing choice patterns to capturing temporal aspects of visual decision-making has been challenging. Here, we sketch a general-purpose methodology to construct computational accounts of reaction times from a stimulus-computable, task-optimized model. Specifically, we introduce a novel metric leveraging insights from subjective logic theory summarizing evidence accumulation in recurrent vision models. We demonstrate that our metric aligns with patterns of human reaction times for stimulus manipulations across four disparate visual decision-making tasks spanning perceptual grouping, mental simulation, and scene categorization. This work paves the way for exploring the temporal alignment of model and human visual strategies in the context of various other cognitive tasks toward generating testable hypotheses for neuroscience. Links to the code and data can be found on the project page: https://serre-lab.github.io/rnn_rts_site. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11582v2-abstract-full').style.display = 'none'; document.getElementById('2306.11582v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.04264">arXiv:2302.04264</a> <span> [<a href="https://arxiv.org/pdf/2302.04264">pdf</a>, <a href="https://arxiv.org/format/2302.04264">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3588432.3591516">10.1145/3588432.3591516 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Nerfstudio: A Modular Framework for Neural Radiance Field Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tancik%2C+M">Matthew Tancik</a>, <a href="/search/cs?searchtype=author&query=Weber%2C+E">Ethan Weber</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+E">Evonne Ng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruilong Li</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+B">Brent Yi</a>, <a href="/search/cs?searchtype=author&query=Kerr%2C+J">Justin Kerr</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Terrance Wang</a>, <a href="/search/cs?searchtype=author&query=Kristoffersen%2C+A">Alexander Kristoffersen</a>, <a href="/search/cs?searchtype=author&query=Austin%2C+J">Jake Austin</a>, <a href="/search/cs?searchtype=author&query=Salahi%2C+K">Kamyar Salahi</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Abhik Ahuja</a>, <a href="/search/cs?searchtype=author&query=McAllister%2C+D">David McAllister</a>, <a href="/search/cs?searchtype=author&query=Kanazawa%2C+A">Angjoo Kanazawa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.04264v4-abstract-short" style="display: inline;"> Neural Radiance Fields (NeRF) are a rapidly growing area of research with wide-ranging applications in computer vision, graphics, robotics, and more. In order to streamline the development and deployment of NeRF research, we propose a modular PyTorch framework, Nerfstudio. Our framework includes plug-and-play components for implementing NeRF-based methods, which make it easy for researchers and pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.04264v4-abstract-full').style.display = 'inline'; document.getElementById('2302.04264v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.04264v4-abstract-full" style="display: none;"> Neural Radiance Fields (NeRF) are a rapidly growing area of research with wide-ranging applications in computer vision, graphics, robotics, and more. In order to streamline the development and deployment of NeRF research, we propose a modular PyTorch framework, Nerfstudio. Our framework includes plug-and-play components for implementing NeRF-based methods, which make it easy for researchers and practitioners to incorporate NeRF into their projects. Additionally, the modular design enables support for extensive real-time visualization tools, streamlined pipelines for importing captured in-the-wild data, and tools for exporting to video, point cloud and mesh representations. The modularity of Nerfstudio enables the development of Nerfacto, our method that combines components from recent papers to achieve a balance between speed and quality, while also remaining flexible to future modifications. To promote community-driven development, all associated code and data are made publicly available with open-source licensing at https://nerf.studio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.04264v4-abstract-full').style.display = 'none'; document.getElementById('2302.04264v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page at https://nerf.studio</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00763">arXiv:2302.00763</a> <span> [<a href="https://arxiv.org/pdf/2302.00763">pdf</a>, <a href="https://arxiv.org/format/2302.00763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Collaborating with language models for embodied reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dasgupta%2C+I">Ishita Dasgupta</a>, <a href="/search/cs?searchtype=author&query=Kaeser-Chen%2C+C">Christine Kaeser-Chen</a>, <a href="/search/cs?searchtype=author&query=Marino%2C+K">Kenneth Marino</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Babayan%2C+S">Sheila Babayan</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+F">Felix Hill</a>, <a href="/search/cs?searchtype=author&query=Fergus%2C+R">Rob Fergus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00763v1-abstract-short" style="display: inline;"> Reasoning in a complex and ambiguous environment is a key goal for Reinforcement Learning (RL) agents. While some sophisticated RL agents can successfully solve difficult tasks, they require a large amount of training data and often struggle to generalize to new unseen environments and new tasks. On the other hand, Large Scale Language Models (LSLMs) have exhibited strong reasoning ability and the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00763v1-abstract-full').style.display = 'inline'; document.getElementById('2302.00763v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00763v1-abstract-full" style="display: none;"> Reasoning in a complex and ambiguous environment is a key goal for Reinforcement Learning (RL) agents. While some sophisticated RL agents can successfully solve difficult tasks, they require a large amount of training data and often struggle to generalize to new unseen environments and new tasks. On the other hand, Large Scale Language Models (LSLMs) have exhibited strong reasoning ability and the ability to to adapt to new tasks through in-context learning. However, LSLMs do not inherently have the ability to interrogate or intervene on the environment. In this work, we investigate how to combine these complementary abilities in a single system consisting of three parts: a Planner, an Actor, and a Reporter. The Planner is a pre-trained language model that can issue commands to a simple embodied agent (the Actor), while the Reporter communicates with the Planner to inform its next command. We present a set of tasks that require reasoning, test this system's ability to generalize zero-shot and investigate failure cases, and demonstrate how components of this system can be trained with reinforcement-learning to improve performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00763v1-abstract-full').style.display = 'none'; document.getElementById('2302.00763v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at NeurIPS 2022 Language and Reinforcement Learning Workshop (best paper) and NeurIPS 2022 Foundation Models for Decision Making Workshop. 4 pages main; 14 pages total (including references and appendix); 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12507">arXiv:2301.12507</a> <span> [<a href="https://arxiv.org/pdf/2301.12507">pdf</a>, <a href="https://arxiv.org/format/2301.12507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Distilling Internet-Scale Vision-Language Models into Embodied Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sumers%2C+T">Theodore Sumers</a>, <a href="/search/cs?searchtype=author&query=Marino%2C+K">Kenneth Marino</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Fergus%2C+R">Rob Fergus</a>, <a href="/search/cs?searchtype=author&query=Dasgupta%2C+I">Ishita Dasgupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12507v2-abstract-short" style="display: inline;"> Instruction-following agents must ground language into their observation and action spaces. Learning to ground language is challenging, typically requiring domain-specific engineering or large quantities of human interaction data. To address this challenge, we propose using pretrained vision-language models (VLMs) to supervise embodied agents. We combine ideas from model distillation and hindsight… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12507v2-abstract-full').style.display = 'inline'; document.getElementById('2301.12507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12507v2-abstract-full" style="display: none;"> Instruction-following agents must ground language into their observation and action spaces. Learning to ground language is challenging, typically requiring domain-specific engineering or large quantities of human interaction data. To address this challenge, we propose using pretrained vision-language models (VLMs) to supervise embodied agents. We combine ideas from model distillation and hindsight experience replay (HER), using a VLM to retroactively generate language describing the agent's behavior. Simple prompting allows us to control the supervision signal, teaching an agent to interact with novel objects based on their names (e.g., planes) or their features (e.g., colors) in a 3D rendered environment. Fewshot prompting lets us teach abstract category membership, including pre-existing categories (food vs toys) and ad-hoc ones (arbitrary preferences over objects). Our work outlines a new and effective way to use internet-scale VLMs, repurposing the generic language grounding acquired by such models to teach task-relevant groundings to embodied agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12507v2-abstract-full').style.display = 'none'; document.getElementById('2301.12507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures. Presented at ICML 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.11602">arXiv:2211.11602</a> <span> [<a href="https://arxiv.org/pdf/2211.11602">pdf</a>, <a href="https://arxiv.org/format/2211.11602">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Improving Multimodal Interactive Agents with Reinforcement Learning from Human Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Carnevale%2C+F">Federico Carnevale</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Goldin%2C+A">Alex Goldin</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+A">Alden Hung</a>, <a href="/search/cs?searchtype=author&query=Landon%2C+J">Jessica Landon</a>, <a href="/search/cs?searchtype=author&query=Lhotka%2C+J">Jirka Lhotka</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Muldal%2C+A">Alistair Muldal</a>, <a href="/search/cs?searchtype=author&query=Powell%2C+G">George Powell</a>, <a href="/search/cs?searchtype=author&query=Santoro%2C+A">Adam Santoro</a>, <a href="/search/cs?searchtype=author&query=Scully%2C+G">Guy Scully</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+S">Sanjana Srivastava</a>, <a href="/search/cs?searchtype=author&query=von+Glehn%2C+T">Tamara von Glehn</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+N">Nathaniel Wong</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chen Yan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Rui Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.11602v1-abstract-short" style="display: inline;"> An important goal in artificial intelligence is to create agents that can both interact naturally with humans and learn from their feedback. Here we demonstrate how to use reinforcement learning from human feedback (RLHF) to improve upon simulated, embodied agents trained to a base level of competency with imitation learning. First, we collected data of humans interacting with agents in a simulate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.11602v1-abstract-full').style.display = 'inline'; document.getElementById('2211.11602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.11602v1-abstract-full" style="display: none;"> An important goal in artificial intelligence is to create agents that can both interact naturally with humans and learn from their feedback. Here we demonstrate how to use reinforcement learning from human feedback (RLHF) to improve upon simulated, embodied agents trained to a base level of competency with imitation learning. First, we collected data of humans interacting with agents in a simulated 3D world. We then asked annotators to record moments where they believed that agents either progressed toward or regressed from their human-instructed goal. Using this annotation data we leveraged a novel method - which we call "Inter-temporal Bradley-Terry" (IBT) modelling - to build a reward model that captures human judgments. Agents trained to optimise rewards delivered from IBT reward models improved with respect to all of our metrics, including subsequent human judgment during live interactions with agents. Altogether our results demonstrate how one can successfully leverage human judgments to improve agent behaviour, allowing us to use reinforcement learning in complex, embodied domains without programmatic reward functions. Videos of agent behaviour may be found at https://youtu.be/v_Z9F2_eKk4. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.11602v1-abstract-full').style.display = 'none'; document.getElementById('2211.11602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.02131">arXiv:2211.02131</a> <span> [<a href="https://arxiv.org/pdf/2211.02131">pdf</a>, <a href="https://arxiv.org/format/2211.02131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Safe Real-World Autonomous Driving by Learning to Predict and Plan with a Mixture of Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pini%2C+S">Stefano Pini</a>, <a href="/search/cs?searchtype=author&query=Perone%2C+C+S">Christian S. Perone</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aayush Ahuja</a>, <a href="/search/cs?searchtype=author&query=Ferreira%2C+A+S+R">Ana Sofia Rufino Ferreira</a>, <a href="/search/cs?searchtype=author&query=Niendorf%2C+M">Moritz Niendorf</a>, <a href="/search/cs?searchtype=author&query=Zagoruyko%2C+S">Sergey Zagoruyko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.02131v1-abstract-short" style="display: inline;"> The goal of autonomous vehicles is to navigate public roads safely and comfortably. To enforce safety, traditional planning approaches rely on handcrafted rules to generate trajectories. Machine learning-based systems, on the other hand, scale with data and are able to learn more complex behaviors. However, they often ignore that agents and self-driving vehicle trajectory distributions can be leve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.02131v1-abstract-full').style.display = 'inline'; document.getElementById('2211.02131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.02131v1-abstract-full" style="display: none;"> The goal of autonomous vehicles is to navigate public roads safely and comfortably. To enforce safety, traditional planning approaches rely on handcrafted rules to generate trajectories. Machine learning-based systems, on the other hand, scale with data and are able to learn more complex behaviors. However, they often ignore that agents and self-driving vehicle trajectory distributions can be leveraged to improve safety. In this paper, we propose modeling a distribution over multiple future trajectories for both the self-driving vehicle and other road agents, using a unified neural network architecture for prediction and planning. During inference, we select the planning trajectory that minimizes a cost taking into account safety and the predicted probabilities. Our approach does not depend on any rule-based planners for trajectory generation or optimization, improves with more training data and is simple to implement. We extensively evaluate our method through a realistic simulator and show that the predicted trajectory distribution corresponds to different driving profiles. We also successfully deploy it on a self-driving vehicle on urban public roads, confirming that it drives safely without compromising comfort. The code for training and testing our model on a public prediction dataset and the video of the road test are available at https://woven.mobi/safepathnet <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.02131v1-abstract-full').style.display = 'none'; document.getElementById('2211.02131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00534">arXiv:2211.00534</a> <span> [<a href="https://arxiv.org/pdf/2211.00534">pdf</a>, <a href="https://arxiv.org/format/2211.00534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning for Global Wildfire Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Prapas%2C+I">Ioannis Prapas</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Akanksha Ahuja</a>, <a href="/search/cs?searchtype=author&query=Kondylatos%2C+S">Spyros Kondylatos</a>, <a href="/search/cs?searchtype=author&query=Karasante%2C+I">Ilektra Karasante</a>, <a href="/search/cs?searchtype=author&query=Panagiotou%2C+E">Eleanna Panagiotou</a>, <a href="/search/cs?searchtype=author&query=Alonso%2C+L">Lazaro Alonso</a>, <a href="/search/cs?searchtype=author&query=Davalas%2C+C">Charalampos Davalas</a>, <a href="/search/cs?searchtype=author&query=Michail%2C+D">Dimitrios Michail</a>, <a href="/search/cs?searchtype=author&query=Carvalhais%2C+N">Nuno Carvalhais</a>, <a href="/search/cs?searchtype=author&query=Papoutsis%2C+I">Ioannis Papoutsis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00534v3-abstract-short" style="display: inline;"> Climate change is expected to aggravate wildfire activity through the exacerbation of fire weather. Improving our capabilities to anticipate wildfires on a global scale is of uttermost importance for mitigating their negative effects. In this work, we create a global fire dataset and demonstrate a prototype for predicting the presence of global burned areas on a sub-seasonal scale with the use of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00534v3-abstract-full').style.display = 'inline'; document.getElementById('2211.00534v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00534v3-abstract-full" style="display: none;"> Climate change is expected to aggravate wildfire activity through the exacerbation of fire weather. Improving our capabilities to anticipate wildfires on a global scale is of uttermost importance for mitigating their negative effects. In this work, we create a global fire dataset and demonstrate a prototype for predicting the presence of global burned areas on a sub-seasonal scale with the use of segmentation deep learning models. Particularly, we present an open-access global analysis-ready datacube, which contains a variety of variables related to the seasonal and sub-seasonal fire drivers (climate, vegetation, oceanic indices, human-related variables), as well as the historical burned areas and wildfire emissions for 2001-2021. We train a deep learning model, which treats global wildfire forecasting as an image segmentation task and skillfully predicts the presence of burned areas 8, 16, 32 and 64 days ahead of time. Our work motivates the use of deep learning for global burned area forecasting and paves the way towards improved anticipation of global wildfire patterns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00534v3-abstract-full').style.display = 'none'; document.getElementById('2211.00534v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the NeurIPS 2022 workshop on Tackling Climate Change with Machine Learning. Version 2 has corrected the table of results (Table 1)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00177">arXiv:2211.00177</a> <span> [<a href="https://arxiv.org/pdf/2211.00177">pdf</a>, <a href="https://arxiv.org/format/2211.00177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Learning to Navigate Wikipedia by Taking Random Walks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&query=Marino%2C+K">Kenneth Marino</a>, <a href="/search/cs?searchtype=author&query=Grathwohl%2C+W">Will Grathwohl</a>, <a href="/search/cs?searchtype=author&query=Schultz%2C+J">John Schultz</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+W">Wendy Shang</a>, <a href="/search/cs?searchtype=author&query=Babayan%2C+S">Sheila Babayan</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Dasgupta%2C+I">Ishita Dasgupta</a>, <a href="/search/cs?searchtype=author&query=Kaeser-Chen%2C+C">Christine Kaeser-Chen</a>, <a href="/search/cs?searchtype=author&query=Fergus%2C+R">Rob Fergus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00177v1-abstract-short" style="display: inline;"> A fundamental ability of an intelligent web-based agent is seeking out and acquiring new information. Internet search engines reliably find the correct vicinity but the top results may be a few links away from the desired target. A complementary approach is navigation via hyperlinks, employing a policy that comprehends local content and selects a link that moves it closer to the target. In this pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00177v1-abstract-full').style.display = 'inline'; document.getElementById('2211.00177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00177v1-abstract-full" style="display: none;"> A fundamental ability of an intelligent web-based agent is seeking out and acquiring new information. Internet search engines reliably find the correct vicinity but the top results may be a few links away from the desired target. A complementary approach is navigation via hyperlinks, employing a policy that comprehends local content and selects a link that moves it closer to the target. In this paper, we show that behavioral cloning of randomly sampled trajectories is sufficient to learn an effective link selection policy. We demonstrate the approach on a graph version of Wikipedia with 38M nodes and 387M edges. The model is able to efficiently navigate between nodes 5 and 20 steps apart 96% and 92% of the time, respectively. We then use the resulting embeddings and policy in downstream fact verification and question answering tasks where, in combination with basic TF-IDF search and ranking methods, they are competitive results to the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00177v1-abstract-full').style.display = 'none'; document.getElementById('2211.00177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NeurIPS 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.13274">arXiv:2205.13274</a> <span> [<a href="https://arxiv.org/pdf/2205.13274">pdf</a>, <a href="https://arxiv.org/format/2205.13274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Multimodal Interactive Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Carnevale%2C+F">Federico Carnevale</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Goldin%2C+A">Alex Goldin</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+A">Alden Hung</a>, <a href="/search/cs?searchtype=author&query=Landon%2C+J">Jessica Landon</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Muldal%2C+A">Alistair Muldal</a>, <a href="/search/cs?searchtype=author&query=Richards%2C+B">Blake Richards</a>, <a href="/search/cs?searchtype=author&query=Santoro%2C+A">Adam Santoro</a>, <a href="/search/cs?searchtype=author&query=von+Glehn%2C+T">Tamara von Glehn</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+N">Nathaniel Wong</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chen Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.13274v2-abstract-short" style="display: inline;"> Creating agents that can interact naturally with humans is a common goal in artificial intelligence (AI) research. However, evaluating these interactions is challenging: collecting online human-agent interactions is slow and expensive, yet faster proxy metrics often do not correlate well with interactive evaluation. In this paper, we assess the merits of these existing evaluation metrics and prese… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.13274v2-abstract-full').style.display = 'inline'; document.getElementById('2205.13274v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.13274v2-abstract-full" style="display: none;"> Creating agents that can interact naturally with humans is a common goal in artificial intelligence (AI) research. However, evaluating these interactions is challenging: collecting online human-agent interactions is slow and expensive, yet faster proxy metrics often do not correlate well with interactive evaluation. In this paper, we assess the merits of these existing evaluation metrics and present a novel approach to evaluation called the Standardised Test Suite (STS). The STS uses behavioural scenarios mined from real human interaction data. Agents see replayed scenario context, receive an instruction, and are then given control to complete the interaction offline. These agent continuations are recorded and sent to human annotators to mark as success or failure, and agents are ranked according to the proportion of continuations in which they succeed. The resulting STS is fast, controlled, interpretable, and representative of naturalistic interactions. Altogether, the STS consolidates much of what is desirable across many of our standard evaluation metrics, allowing us to accelerate research progress towards producing agents that can interact naturally with humans. A video may be found at https://youtu.be/YR1TngGORGQ. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.13274v2-abstract-full').style.display = 'none'; document.getElementById('2205.13274v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.00672">arXiv:2205.00672</a> <span> [<a href="https://arxiv.org/pdf/2205.00672">pdf</a>, <a href="https://arxiv.org/format/2205.00672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> SightSteeple: Agreeing to Disagree with Functional Blockchain Consensus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.00672v1-abstract-short" style="display: inline;"> Classical and contemporary distributed consensus protocols, may they be for binary agreement, state machine replication, or blockchain consensus, require all protocol participants in a peer-to-peer system to agree on exactly the same information as part of the consensus payload. Although this model of consensus is extensively studied, and is useful for most consensus based decentralized applicatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00672v1-abstract-full').style.display = 'inline'; document.getElementById('2205.00672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.00672v1-abstract-full" style="display: none;"> Classical and contemporary distributed consensus protocols, may they be for binary agreement, state machine replication, or blockchain consensus, require all protocol participants in a peer-to-peer system to agree on exactly the same information as part of the consensus payload. Although this model of consensus is extensively studied, and is useful for most consensus based decentralized applications, it falls short of defining correct distributed systems which mandate participant credential based privileged visibility into the consensus payload, through the consensus protocol itself. We introduce a new paradigm for distributed consensus, called functional blockchain consensus. Functional blockchain consensus allows each blockchain protocol participant to agree on some distinct sub-information of the list of transactions, as a function of the credentials of the participant in the blockchain system, instead of agreeing on the entire list of transactions. We motivate two adversary models, one with a standard crash-fault adversary and another with a novel rational-fault adversary, to compromise functional blockchain consensus. We then present two versions of a blockchain protocol called SightSteeple, that achieves functional blockchain consensus in the said fault models. SightSteeple relies on a novel combination of standard blockchain consensus and functional encryption, among other primitives, to achieve its goals of correctness. Finally, we discuss practical uses of functional blockchain consensus based asymmetric distributed ledgers, and motivate off-shoot constructions that can result from this new consensus paradigm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00672v1-abstract-full').style.display = 'none'; document.getElementById('2205.00672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.10422">arXiv:2203.10422</a> <span> [<a href="https://arxiv.org/pdf/2203.10422">pdf</a>, <a href="https://arxiv.org/format/2203.10422">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Subspace Modeling for Fast Out-Of-Distribution and Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ndiour%2C+I+J">Ibrahima J. Ndiour</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+N+A">Nilesh A. Ahuja</a>, <a href="/search/cs?searchtype=author&query=Tickoo%2C+O">Omesh Tickoo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.10422v1-abstract-short" style="display: inline;"> This paper presents a fast, principled approach for detecting anomalous and out-of-distribution (OOD) samples in deep neural networks (DNN). We propose the application of linear statistical dimensionality reduction techniques on the semantic features produced by a DNN, in order to capture the low-dimensional subspace truly spanned by said features. We show that the "feature reconstruction error" (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10422v1-abstract-full').style.display = 'inline'; document.getElementById('2203.10422v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.10422v1-abstract-full" style="display: none;"> This paper presents a fast, principled approach for detecting anomalous and out-of-distribution (OOD) samples in deep neural networks (DNN). We propose the application of linear statistical dimensionality reduction techniques on the semantic features produced by a DNN, in order to capture the low-dimensional subspace truly spanned by said features. We show that the "feature reconstruction error" (FRE), which is the $\ell_2$-norm of the difference between the original feature in the high-dimensional space and the pre-image of its low-dimensional reduced embedding, is highly effective for OOD and anomaly detection. To generalize to intermediate features produced at any given layer, we extend the methodology by applying nonlinear kernel-based methods. Experiments using standard image datasets and DNN architectures demonstrate that our method meets or exceeds best-in-class quality performance, but at a fraction of the computational and memory cost required by the state of the art. It can be trained and run very efficiently, even on a traditional CPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10422v1-abstract-full').style.display = 'none'; document.getElementById('2203.10422v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2012.04250</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.06430">arXiv:2112.06430</a> <span> [<a href="https://arxiv.org/pdf/2112.06430">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Predicting Airbnb Rental Prices Using Multiple Feature Modalities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a>, <a href="/search/cs?searchtype=author&query=Lahiri%2C+A">Aditya Lahiri</a>, <a href="/search/cs?searchtype=author&query=Das%2C+A">Aniruddha Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.06430v1-abstract-short" style="display: inline;"> Figuring out the price of a listed Airbnb rental is an important and difficult task for both the host and the customer. For the former, it can enable them to set a reasonable price without compromising on their profits. For the customer, it helps understand the key drivers for price and also provides them with similarly priced places. This price prediction regression task can also have multiple do… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.06430v1-abstract-full').style.display = 'inline'; document.getElementById('2112.06430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.06430v1-abstract-full" style="display: none;"> Figuring out the price of a listed Airbnb rental is an important and difficult task for both the host and the customer. For the former, it can enable them to set a reasonable price without compromising on their profits. For the customer, it helps understand the key drivers for price and also provides them with similarly priced places. This price prediction regression task can also have multiple downstream uses, such as in recommendation of similar rentals based on price. We propose to use geolocation, temporal, visual and natural language features to create a reliable and accurate price prediction algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.06430v1-abstract-full').style.display = 'none'; document.getElementById('2112.06430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.03763">arXiv:2112.03763</a> <span> [<a href="https://arxiv.org/pdf/2112.03763">pdf</a>, <a href="https://arxiv.org/format/2112.03763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Creating Multimodal Interactive Agents with Imitation and Self-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepMind+Interactive+Agents+Team"> DeepMind Interactive Agents Team</a>, <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Brussee%2C+A">Arthur Brussee</a>, <a href="/search/cs?searchtype=author&query=Carnevale%2C+F">Federico Carnevale</a>, <a href="/search/cs?searchtype=author&query=Cassin%2C+M">Mary Cassin</a>, <a href="/search/cs?searchtype=author&query=Fischer%2C+F">Felix Fischer</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Goldin%2C+A">Alex Goldin</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mansi Gupta</a>, <a href="/search/cs?searchtype=author&query=Harley%2C+T">Tim Harley</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+F">Felix Hill</a>, <a href="/search/cs?searchtype=author&query=Humphreys%2C+P+C">Peter C Humphreys</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+A">Alden Hung</a>, <a href="/search/cs?searchtype=author&query=Landon%2C+J">Jessica Landon</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Merzic%2C+H">Hamza Merzic</a>, <a href="/search/cs?searchtype=author&query=Muldal%2C+A">Alistair Muldal</a>, <a href="/search/cs?searchtype=author&query=Santoro%2C+A">Adam Santoro</a>, <a href="/search/cs?searchtype=author&query=Scully%2C+G">Guy Scully</a>, <a href="/search/cs?searchtype=author&query=von+Glehn%2C+T">Tamara von Glehn</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+N">Nathaniel Wong</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chen Yan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Rui Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.03763v2-abstract-short" style="display: inline;"> A common vision from science fiction is that robots will one day inhabit our physical spaces, sense the world as we do, assist our physical labours, and communicate with us through natural language. Here we study how to design artificial agents that can interact naturally with humans using the simplification of a virtual environment. We show that imitation learning of human-human interactions in a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.03763v2-abstract-full').style.display = 'inline'; document.getElementById('2112.03763v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.03763v2-abstract-full" style="display: none;"> A common vision from science fiction is that robots will one day inhabit our physical spaces, sense the world as we do, assist our physical labours, and communicate with us through natural language. Here we study how to design artificial agents that can interact naturally with humans using the simplification of a virtual environment. We show that imitation learning of human-human interactions in a simulated world, in conjunction with self-supervised learning, is sufficient to produce a multimodal interactive agent, which we call MIA, that successfully interacts with non-adversarial humans 75% of the time. We further identify architectural and algorithmic techniques that improve performance, such as hierarchical action selection. Altogether, our results demonstrate that imitation of multi-modal, real-time human behaviour may provide a straightforward and surprisingly effective means of imbuing agents with a rich behavioural prior from which agents might then be fine-tuned for specific purposes, thus laying a foundation for training capable agents for interactive robots or digital assistants. A video of MIA's behaviour may be found at https://youtu.be/ZFgRhviF7mY <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.03763v2-abstract-full').style.display = 'none'; document.getElementById('2112.03763v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.00219">arXiv:2112.00219</a> <span> [<a href="https://arxiv.org/pdf/2112.00219">pdf</a>, <a href="https://arxiv.org/format/2112.00219">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Scalable Primitives for Generalized Sensor Fusion in Autonomous Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sidhu%2C+S">Sammy Sidhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Linda Wang</a>, <a href="/search/cs?searchtype=author&query=Naseer%2C+T">Tayyab Naseer</a>, <a href="/search/cs?searchtype=author&query=Malhotra%2C+A">Ashish Malhotra</a>, <a href="/search/cs?searchtype=author&query=Chia%2C+J">Jay Chia</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aayush Ahuja</a>, <a href="/search/cs?searchtype=author&query=Rasmussen%2C+E">Ella Rasmussen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qiangui Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+R">Ray Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.00219v1-abstract-short" style="display: inline;"> In autonomous driving, there has been an explosion in the use of deep neural networks for perception, prediction and planning tasks. As autonomous vehicles (AVs) move closer to production, multi-modal sensor inputs and heterogeneous vehicle fleets with different sets of sensor platforms are becoming increasingly common in the industry. However, neural network architectures typically target specifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.00219v1-abstract-full').style.display = 'inline'; document.getElementById('2112.00219v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.00219v1-abstract-full" style="display: none;"> In autonomous driving, there has been an explosion in the use of deep neural networks for perception, prediction and planning tasks. As autonomous vehicles (AVs) move closer to production, multi-modal sensor inputs and heterogeneous vehicle fleets with different sets of sensor platforms are becoming increasingly common in the industry. However, neural network architectures typically target specific sensor platforms and are not robust to changes in input, making the problem of scaling and model deployment particularly difficult. Furthermore, most players still treat the problem of optimizing software and hardware as entirely independent problems. We propose a new end to end architecture, Generalized Sensor Fusion (GSF), which is designed in such a way that both sensor inputs and target tasks are modular and modifiable. This enables AV system designers to easily experiment with different sensor configurations and methods and opens up the ability to deploy on heterogeneous fleets using the same models that are shared across a large engineering organization. Using this system, we report experimental results where we demonstrate near-parity of an expensive high-density (HD) LiDAR sensor with a cheap low-density (LD) LiDAR plus camera setup in the 3D object detection task. This paves the way for the industry to jointly design hardware and software architectures as well as large fleets with heterogeneous configurations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.00219v1-abstract-full').style.display = 'none'; document.getElementById('2112.00219v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented in Machine Learning for Autonomous Driving Workshop at the 35th Conference on Neural Information Processing Systems (NeurIPS 2021), Sydney, Australia. 11 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.14332">arXiv:2109.14332</a> <span> [<a href="https://arxiv.org/pdf/2109.14332">pdf</a>, <a href="https://arxiv.org/format/2109.14332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3460418.3479326">10.1145/3460418.3479326 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PilotEar: Enabling In-ear Inertial Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Ashwin Ahuja</a>, <a href="/search/cs?searchtype=author&query=Ferlini%2C+A">Andrea Ferlini</a>, <a href="/search/cs?searchtype=author&query=Mascolo%2C+C">Cecilia Mascolo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.14332v1-abstract-short" style="display: inline;"> Navigation systems are used daily. While different types of navigation systems exist, inertial navigation systems (INS) have favorable properties for some wearables which, for battery and form factors may not be able to use GPS. Earables (aka ear-worn wearables) are living a momentum both as leisure devices, and sensing and computing platforms. The inherent high signal to noise ratio (SNR) of ear-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14332v1-abstract-full').style.display = 'inline'; document.getElementById('2109.14332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.14332v1-abstract-full" style="display: none;"> Navigation systems are used daily. While different types of navigation systems exist, inertial navigation systems (INS) have favorable properties for some wearables which, for battery and form factors may not be able to use GPS. Earables (aka ear-worn wearables) are living a momentum both as leisure devices, and sensing and computing platforms. The inherent high signal to noise ratio (SNR) of ear-collected inertial data, due to the vibration dumping of the musculoskeletal system; combined with the fact that people typically wear a pair of earables (one per ear) could offer significant accuracy when tracking head movements, leading to potential improvements for inertial navigation. Hence, in this work, we investigate and propose PilotEar, the first end-to-end earable-based inertial navigation system, achieving an average tracking drift of 0.15 m/s for one earable and 0.11 m/s for two earables. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14332v1-abstract-full').style.display = 'none'; document.getElementById('2109.14332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.10295">arXiv:2107.10295</a> <span> [<a href="https://arxiv.org/pdf/2107.10295">pdf</a>, <a href="https://arxiv.org/format/2107.10295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-021-04590-0">10.1038/s41598-021-04590-0 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Review of Some Techniques for Inclusion of Domain-Knowledge into Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dash%2C+T">Tirtharaj Dash</a>, <a href="/search/cs?searchtype=author&query=Chitlangia%2C+S">Sharad Chitlangia</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a>, <a href="/search/cs?searchtype=author&query=Srinivasan%2C+A">Ashwin Srinivasan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.10295v4-abstract-short" style="display: inline;"> We present a survey of ways in which existing scientific knowledge are included when constructing models with neural networks. The inclusion of domain-knowledge is of special interest not just to constructing scientific assistants, but also, many other areas that involve understanding data using human-machine collaboration. In many such instances, machine-based model construction may benefit signi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.10295v4-abstract-full').style.display = 'inline'; document.getElementById('2107.10295v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.10295v4-abstract-full" style="display: none;"> We present a survey of ways in which existing scientific knowledge are included when constructing models with neural networks. The inclusion of domain-knowledge is of special interest not just to constructing scientific assistants, but also, many other areas that involve understanding data using human-machine collaboration. In many such instances, machine-based model construction may benefit significantly from being provided with human-knowledge of the domain encoded in a sufficiently precise form. This paper examines the inclusion of domain-knowledge by means of changes to: the input, the loss-function, and the architecture of deep networks. The categorisation is for ease of exposition: in practice we expect a combination of such changes will be employed. In each category, we describe techniques that have been shown to yield significant changes in the performance of deep neural networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.10295v4-abstract-full').style.display = 'none'; document.getElementById('2107.10295v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages; Accepted at Nature Scientific Reports. arXiv admin note: substantial text overlap with arXiv:2103.00180</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 (Primary); 68T05; 68T01 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.2.4 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 12, 1040 (2022) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.03851">arXiv:2107.03851</a> <span> [<a href="https://arxiv.org/pdf/2107.03851">pdf</a>, <a href="https://arxiv.org/format/2107.03851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Imitation by Predicting Observations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jaegle%2C+A">Andrew Jaegle</a>, <a href="/search/cs?searchtype=author&query=Sulsky%2C+Y">Yury Sulsky</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Bruce%2C+J">Jake Bruce</a>, <a href="/search/cs?searchtype=author&query=Fergus%2C+R">Rob Fergus</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.03851v1-abstract-short" style="display: inline;"> Imitation learning enables agents to reuse and adapt the hard-won expertise of others, offering a solution to several key challenges in learning behavior. Although it is easy to observe behavior in the real-world, the underlying actions may not be accessible. We present a new method for imitation solely from observations that achieves comparable performance to experts on challenging continuous con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.03851v1-abstract-full').style.display = 'inline'; document.getElementById('2107.03851v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.03851v1-abstract-full" style="display: none;"> Imitation learning enables agents to reuse and adapt the hard-won expertise of others, offering a solution to several key challenges in learning behavior. Although it is easy to observe behavior in the real-world, the underlying actions may not be accessible. We present a new method for imitation solely from observations that achieves comparable performance to experts on challenging continuous control tasks while also exhibiting robustness in the presence of observations unrelated to the task. Our method, which we call FORM (for "Future Observation Reward Model") is derived from an inverse RL objective and imitates using a model of expert behavior learned by generative modelling of the expert's observations, without needing ground truth actions. We show that FORM performs comparably to a strong baseline IRL method (GAIL) on the DeepMind Control Suite benchmark, while outperforming GAIL in the presence of task-irrelevant features. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.03851v1-abstract-full').style.display = 'none'; document.getElementById('2107.03851v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.09568">arXiv:2104.09568</a> <span> [<a href="https://arxiv.org/pdf/2104.09568">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detecting Vehicle Type and License Plate Number of different Vehicles on Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aashna Ahuja</a>, <a href="/search/cs?searchtype=author&query=Chaudhuri%2C+A">Arindam Chaudhuri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.09568v1-abstract-short" style="display: inline;"> With ever increasing number of vehicles, vehicular tracking is one of the major challenges faced by urban areas. In this paper we try to develop a model that can locate a particular vehicle that the user is looking for depending on two factors 1. the Type of vehicle and the 2. License plate number of the car. The proposed system uses a unique mixture consisting of Mask R-CNN model for vehicle type… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.09568v1-abstract-full').style.display = 'inline'; document.getElementById('2104.09568v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.09568v1-abstract-full" style="display: none;"> With ever increasing number of vehicles, vehicular tracking is one of the major challenges faced by urban areas. In this paper we try to develop a model that can locate a particular vehicle that the user is looking for depending on two factors 1. the Type of vehicle and the 2. License plate number of the car. The proposed system uses a unique mixture consisting of Mask R-CNN model for vehicle type detection, WpodNet and pytesseract for License Plate detection and Prediction of letters in it. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.09568v1-abstract-full').style.display = 'none'; document.getElementById('2104.09568v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Present Research Work in Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.16216">arXiv:2103.16216</a> <span> [<a href="https://arxiv.org/pdf/2103.16216">pdf</a>, <a href="https://arxiv.org/format/2103.16216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A Regulatory System for Optimal Legal Transaction Throughput in Cryptocurrency Blockchains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a>, <a href="/search/cs?searchtype=author&query=Ribeiro%2C+V+J">Vinay J. Ribeiro</a>, <a href="/search/cs?searchtype=author&query=Pal%2C+R">Ranjan Pal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.16216v1-abstract-short" style="display: inline;"> Permissionless blockchain consensus protocols have been designed primarily for defining decentralized economies for the commercial trade of assets, both virtual and physical, using cryptocurrencies. In most instances, the assets being traded are regulated, which mandates that the legal right to their trade and their trade value are determined by the governmental regulator of the jurisdiction in wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.16216v1-abstract-full').style.display = 'inline'; document.getElementById('2103.16216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.16216v1-abstract-full" style="display: none;"> Permissionless blockchain consensus protocols have been designed primarily for defining decentralized economies for the commercial trade of assets, both virtual and physical, using cryptocurrencies. In most instances, the assets being traded are regulated, which mandates that the legal right to their trade and their trade value are determined by the governmental regulator of the jurisdiction in which the trade occurs. Unfortunately, existing blockchains do not formally recognise proposal of legal cryptocurrency transactions, as part of the execution of their respective consensus protocols, resulting in rampant illegal activities in the associated crypto-economies. In this contribution, we motivate the need for regulated blockchain consensus protocols with a case study of the illegal, cryptocurrency based, Silk Road darknet market. We present a novel regulatory framework for blockchain protocols, for ensuring legal transaction confirmation as part of the blockchain distributed consensus. As per our regulatory framework, we derive conditions under which legal transaction throughput supersedes throughput of traditional transactions, which are, in the worst case, an indifferentiable mix of legal and illegal transactions. Finally, we show that with a small change to the standard blockchain consensus execution policy (appropriately introduced through regulation), the legal transaction throughput in the blockchain network can be maximized. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.16216v1-abstract-full').style.display = 'none'; document.getElementById('2103.16216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.00180">arXiv:2103.00180</a> <span> [<a href="https://arxiv.org/pdf/2103.00180">pdf</a>, <a href="https://arxiv.org/format/2103.00180">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Incorporating Domain Knowledge into Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dash%2C+T">Tirtharaj Dash</a>, <a href="/search/cs?searchtype=author&query=Chitlangia%2C+S">Sharad Chitlangia</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a>, <a href="/search/cs?searchtype=author&query=Srinivasan%2C+A">Ashwin Srinivasan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.00180v2-abstract-short" style="display: inline;"> We present a survey of ways in which domain-knowledge has been included when constructing models with neural networks. The inclusion of domain-knowledge is of special interest not just to constructing scientific assistants, but also, many other areas that involve understanding data using human-machine collaboration. In many such instances, machine-based model construction may benefit significantly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00180v2-abstract-full').style.display = 'inline'; document.getElementById('2103.00180v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.00180v2-abstract-full" style="display: none;"> We present a survey of ways in which domain-knowledge has been included when constructing models with neural networks. The inclusion of domain-knowledge is of special interest not just to constructing scientific assistants, but also, many other areas that involve understanding data using human-machine collaboration. In many such instances, machine-based model construction may benefit significantly from being provided with human-knowledge of the domain encoded in a sufficiently precise form. This paper examines two broad approaches to encode such knowledge--as logical and numerical constraints--and describes techniques and results obtained in several sub-categories under each of these approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00180v2-abstract-full').style.display = 'none'; document.getElementById('2103.00180v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IJCAI-2021 Survey Track (6+2 pages)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 (Primary); 68T05; 68T01 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.2.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.05672">arXiv:2012.05672</a> <span> [<a href="https://arxiv.org/pdf/2012.05672">pdf</a>, <a href="https://arxiv.org/format/2012.05672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Imitating Interactive Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Barr%2C+I">Iain Barr</a>, <a href="/search/cs?searchtype=author&query=Brussee%2C+A">Arthur Brussee</a>, <a href="/search/cs?searchtype=author&query=Carnevale%2C+F">Federico Carnevale</a>, <a href="/search/cs?searchtype=author&query=Cassin%2C+M">Mary Cassin</a>, <a href="/search/cs?searchtype=author&query=Chhaparia%2C+R">Rachita Chhaparia</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+S">Stephen Clark</a>, <a href="/search/cs?searchtype=author&query=Damoc%2C+B">Bogdan Damoc</a>, <a href="/search/cs?searchtype=author&query=Dudzik%2C+A">Andrew Dudzik</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Guy%2C+A">Aurelia Guy</a>, <a href="/search/cs?searchtype=author&query=Harley%2C+T">Tim Harley</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+F">Felix Hill</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+A">Alden Hung</a>, <a href="/search/cs?searchtype=author&query=Kenton%2C+Z">Zachary Kenton</a>, <a href="/search/cs?searchtype=author&query=Landon%2C+J">Jessica Landon</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Mathewson%2C+K">Kory Mathewson</a>, <a href="/search/cs?searchtype=author&query=Mokr%C3%A1%2C+S">So艌a Mokr谩</a>, <a href="/search/cs?searchtype=author&query=Muldal%2C+A">Alistair Muldal</a>, <a href="/search/cs?searchtype=author&query=Santoro%2C+A">Adam Santoro</a>, <a href="/search/cs?searchtype=author&query=Savinov%2C+N">Nikolay Savinov</a>, <a href="/search/cs?searchtype=author&query=Varma%2C+V">Vikrant Varma</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.05672v2-abstract-short" style="display: inline;"> A common vision from science fiction is that robots will one day inhabit our physical spaces, sense the world as we do, assist our physical labours, and communicate with us through natural language. Here we study how to design artificial agents that can interact naturally with humans using the simplification of a virtual environment. This setting nevertheless integrates a number of the central cha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.05672v2-abstract-full').style.display = 'inline'; document.getElementById('2012.05672v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.05672v2-abstract-full" style="display: none;"> A common vision from science fiction is that robots will one day inhabit our physical spaces, sense the world as we do, assist our physical labours, and communicate with us through natural language. Here we study how to design artificial agents that can interact naturally with humans using the simplification of a virtual environment. This setting nevertheless integrates a number of the central challenges of artificial intelligence (AI) research: complex visual perception and goal-directed physical control, grounded language comprehension and production, and multi-agent social interaction. To build agents that can robustly interact with humans, we would ideally train them while they interact with humans. However, this is presently impractical. Therefore, we approximate the role of the human with another learned agent, and use ideas from inverse reinforcement learning to reduce the disparities between human-human and agent-agent interactive behaviour. Rigorously evaluating our agents poses a great challenge, so we develop a variety of behavioural tests, including evaluation by humans who watch videos of agents or interact directly with them. These evaluations convincingly demonstrate that interactive training and auxiliary losses improve agent behaviour beyond what is achieved by supervised learning of actions alone. Further, we demonstrate that agent capabilities generalise beyond literal experiences in the dataset. Finally, we train evaluation models whose ratings of agents agree well with human judgement, thus permitting the evaluation of new agent models without additional effort. Taken together, our results in this virtual environment provide evidence that large-scale human behavioural imitation is a promising tool to create intelligent, interactive agents, and the challenge of reliably evaluating such agents is possible to surmount. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.05672v2-abstract-full').style.display = 'none'; document.getElementById('2012.05672v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.14274">arXiv:2010.14274</a> <span> [<a href="https://arxiv.org/pdf/2010.14274">pdf</a>, <a href="https://arxiv.org/format/2010.14274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Behavior Priors for Efficient Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tirumala%2C+D">Dhruva Tirumala</a>, <a href="/search/cs?searchtype=author&query=Galashov%2C+A">Alexandre Galashov</a>, <a href="/search/cs?searchtype=author&query=Noh%2C+H">Hyeonwoo Noh</a>, <a href="/search/cs?searchtype=author&query=Hasenclever%2C+L">Leonard Hasenclever</a>, <a href="/search/cs?searchtype=author&query=Pascanu%2C+R">Razvan Pascanu</a>, <a href="/search/cs?searchtype=author&query=Schwarz%2C+J">Jonathan Schwarz</a>, <a href="/search/cs?searchtype=author&query=Desjardins%2C+G">Guillaume Desjardins</a>, <a href="/search/cs?searchtype=author&query=Czarnecki%2C+W+M">Wojciech Marian Czarnecki</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Teh%2C+Y+W">Yee Whye Teh</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.14274v1-abstract-short" style="display: inline;"> As we deploy reinforcement learning agents to solve increasingly challenging problems, methods that allow us to inject prior knowledge about the structure of the world and effective solution strategies becomes increasingly important. In this work we consider how information and architectural constraints can be combined with ideas from the probabilistic modeling literature to learn behavior priors… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14274v1-abstract-full').style.display = 'inline'; document.getElementById('2010.14274v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.14274v1-abstract-full" style="display: none;"> As we deploy reinforcement learning agents to solve increasingly challenging problems, methods that allow us to inject prior knowledge about the structure of the world and effective solution strategies becomes increasingly important. In this work we consider how information and architectural constraints can be combined with ideas from the probabilistic modeling literature to learn behavior priors that capture the common movement and interaction patterns that are shared across a set of related tasks or contexts. For example the day-to day behavior of humans comprises distinctive locomotion and manipulation patterns that recur across many different situations and goals. We discuss how such behavior patterns can be captured using probabilistic trajectory models and how these can be integrated effectively into reinforcement learning schemes, e.g.\ to facilitate multi-task and transfer learning. We then extend these ideas to latent variable models and consider a formulation to learn hierarchical priors that capture different aspects of the behavior in reusable modules. We discuss how such latent variable formulations connect to related work on hierarchical reinforcement learning (HRL) and mutual information and curiosity based objectives, thereby offering an alternative perspective on existing ideas. We demonstrate the effectiveness of our framework by applying it to a range of simulated continuous control domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14274v1-abstract-full').style.display = 'none'; document.getElementById('2010.14274v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to Journal of Machine Learning Research (JMLR)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.01016">arXiv:2006.01016</a> <span> [<a href="https://arxiv.org/pdf/2006.01016">pdf</a>, <a href="https://arxiv.org/format/2006.01016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Probing Emergent Semantics in Predictive Agents via Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+A">Abhishek Das</a>, <a href="/search/cs?searchtype=author&query=Carnevale%2C+F">Federico Carnevale</a>, <a href="/search/cs?searchtype=author&query=Merzic%2C+H">Hamza Merzic</a>, <a href="/search/cs?searchtype=author&query=Rimell%2C+L">Laura Rimell</a>, <a href="/search/cs?searchtype=author&query=Schneider%2C+R">Rosalia Schneider</a>, <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+A">Alden Hung</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+S">Stephen Clark</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Gregory Wayne</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+F">Felix Hill</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.01016v1-abstract-short" style="display: inline;"> Recent work has shown how predictive modeling can endow agents with rich knowledge of their surroundings, improving their ability to act in complex environments. We propose question-answering as a general paradigm to decode and understand the representations that such agents develop, applying our method to two recent approaches to predictive modeling -action-conditional CPC (Guo et al., 2018) and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.01016v1-abstract-full').style.display = 'inline'; document.getElementById('2006.01016v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.01016v1-abstract-full" style="display: none;"> Recent work has shown how predictive modeling can endow agents with rich knowledge of their surroundings, improving their ability to act in complex environments. We propose question-answering as a general paradigm to decode and understand the representations that such agents develop, applying our method to two recent approaches to predictive modeling -action-conditional CPC (Guo et al., 2018) and SimCore (Gregor et al., 2019). After training agents with these predictive objectives in a visually-rich, 3D environment with an assortment of objects, colors, shapes, and spatial configurations, we probe their internal state representations with synthetic (English) questions, without backpropagating gradients from the question-answering decoder into the agent. The performance of different agents when probed this way reveals that they learn to encode factual, and seemingly compositional, information about objects, properties and spatial relations from their physical environment. Our approach is intuitive, i.e. humans can easily interpret responses of the model as opposed to inspecting continuous vectors, and model-agnostic, i.e. applicable to any modeling approach. By revealing the implicit knowledge of objects, quantities, properties and relations acquired by agents as they learn, question-conditional agent probing can stimulate the design and development of stronger predictive learning objectives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.01016v1-abstract-full').style.display = 'none'; document.getElementById('2006.01016v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.06636">arXiv:1911.06636</a> <span> [<a href="https://arxiv.org/pdf/1911.06636">pdf</a>, <a href="https://arxiv.org/format/1911.06636">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Catch & Carry: Reusable Neural Controllers for Vision-Guided Whole-Body Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Merel%2C+J">Josh Merel</a>, <a href="/search/cs?searchtype=author&query=Tunyasuvunakool%2C+S">Saran Tunyasuvunakool</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Tassa%2C+Y">Yuval Tassa</a>, <a href="/search/cs?searchtype=author&query=Hasenclever%2C+L">Leonard Hasenclever</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+V">Vu Pham</a>, <a href="/search/cs?searchtype=author&query=Erez%2C+T">Tom Erez</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.06636v2-abstract-short" style="display: inline;"> We address the longstanding challenge of producing flexible, realistic humanoid character controllers that can perform diverse whole-body tasks involving object interactions. This challenge is central to a variety of fields, from graphics and animation to robotics and motor neuroscience. Our physics-based environment uses realistic actuation and first-person perception -- including touch sensors a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.06636v2-abstract-full').style.display = 'inline'; document.getElementById('1911.06636v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.06636v2-abstract-full" style="display: none;"> We address the longstanding challenge of producing flexible, realistic humanoid character controllers that can perform diverse whole-body tasks involving object interactions. This challenge is central to a variety of fields, from graphics and animation to robotics and motor neuroscience. Our physics-based environment uses realistic actuation and first-person perception -- including touch sensors and egocentric vision -- with a view to producing active-sensing behaviors (e.g. gaze direction), transferability to real robots, and comparisons to the biology. We develop an integrated neural-network based approach consisting of a motor primitive module, human demonstrations, and an instructed reinforcement learning regime with curricula and task variations. We demonstrate the utility of our approach for several tasks, including goal-conditioned box carrying and ball catching, and we characterize its behavioral robustness. The resulting controllers can be deployed in real-time on a standard PC. See overview video, https://youtu.be/2rQAW-8gQQk . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.06636v2-abstract-full').style.display = 'none'; document.getElementById('1911.06636v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.06988">arXiv:1910.06988</a> <span> [<a href="https://arxiv.org/pdf/1910.06988">pdf</a>, <a href="https://arxiv.org/format/1910.06988">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Autonomous Aerial Cinematography In Unstructured Environments With Learned Artistic Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bonatti%2C+R">Rogerio Bonatti</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenshan Wang</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+C">Cherie Ho</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aayush Ahuja</a>, <a href="/search/cs?searchtype=author&query=Gschwindt%2C+M">Mirko Gschwindt</a>, <a href="/search/cs?searchtype=author&query=Camci%2C+E">Efe Camci</a>, <a href="/search/cs?searchtype=author&query=Kayacan%2C+E">Erdal Kayacan</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+S">Sanjiban Choudhury</a>, <a href="/search/cs?searchtype=author&query=Scherer%2C+S">Sebastian Scherer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.06988v1-abstract-short" style="display: inline;"> Aerial cinematography is revolutionizing industries that require live and dynamic camera viewpoints such as entertainment, sports, and security. However, safely piloting a drone while filming a moving target in the presence of obstacles is immensely taxing, often requiring multiple expert human operators. Hence, there is demand for an autonomous cinematographer that can reason about both geometry… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.06988v1-abstract-full').style.display = 'inline'; document.getElementById('1910.06988v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.06988v1-abstract-full" style="display: none;"> Aerial cinematography is revolutionizing industries that require live and dynamic camera viewpoints such as entertainment, sports, and security. However, safely piloting a drone while filming a moving target in the presence of obstacles is immensely taxing, often requiring multiple expert human operators. Hence, there is demand for an autonomous cinematographer that can reason about both geometry and scene context in real-time. Existing approaches do not address all aspects of this problem; they either require high-precision motion-capture systems or GPS tags to localize targets, rely on prior maps of the environment, plan for short time horizons, or only follow artistic guidelines specified before flight. In this work, we address the problem in its entirety and propose a complete system for real-time aerial cinematography that for the first time combines: (1) vision-based target estimation; (2) 3D signed-distance mapping for occlusion estimation; (3) efficient trajectory optimization for long time-horizon camera motion; and (4) learning-based artistic shot selection. We extensively evaluate our system both in simulation and in field experiments by filming dynamic targets moving through unstructured environments. Our results indicate that our system can operate reliably in the real world without restrictive assumptions. We also provide in-depth analysis and discussions for each module, with the hope that our design tradeoffs can generalize to other related applications. Videos of the complete system can be found at: https://youtu.be/ookhHnqmlaU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.06988v1-abstract-full').style.display = 'none'; document.getElementById('1910.06988v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.12238">arXiv:1909.12238</a> <span> [<a href="https://arxiv.org/pdf/1909.12238">pdf</a>, <a href="https://arxiv.org/format/1909.12238">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> V-MPO: On-Policy Maximum a Posteriori Policy Optimization for Discrete and Continuous Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+H+F">H. Francis Song</a>, <a href="/search/cs?searchtype=author&query=Abdolmaleki%2C+A">Abbas Abdolmaleki</a>, <a href="/search/cs?searchtype=author&query=Springenberg%2C+J+T">Jost Tobias Springenberg</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+A">Aidan Clark</a>, <a href="/search/cs?searchtype=author&query=Soyer%2C+H">Hubert Soyer</a>, <a href="/search/cs?searchtype=author&query=Rae%2C+J+W">Jack W. Rae</a>, <a href="/search/cs?searchtype=author&query=Noury%2C+S">Seb Noury</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Siqi Liu</a>, <a href="/search/cs?searchtype=author&query=Tirumala%2C+D">Dhruva Tirumala</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a>, <a href="/search/cs?searchtype=author&query=Belov%2C+D">Dan Belov</a>, <a href="/search/cs?searchtype=author&query=Riedmiller%2C+M">Martin Riedmiller</a>, <a href="/search/cs?searchtype=author&query=Botvinick%2C+M+M">Matthew M. Botvinick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.12238v1-abstract-short" style="display: inline;"> Some of the most successful applications of deep reinforcement learning to challenging domains in discrete and continuous control have used policy gradient methods in the on-policy setting. However, policy gradients can suffer from large variance that may limit performance, and in practice require carefully tuned entropy regularization to prevent policy collapse. As an alternative to policy gradie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.12238v1-abstract-full').style.display = 'inline'; document.getElementById('1909.12238v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.12238v1-abstract-full" style="display: none;"> Some of the most successful applications of deep reinforcement learning to challenging domains in discrete and continuous control have used policy gradient methods in the on-policy setting. However, policy gradients can suffer from large variance that may limit performance, and in practice require carefully tuned entropy regularization to prevent policy collapse. As an alternative to policy gradient algorithms, we introduce V-MPO, an on-policy adaptation of Maximum a Posteriori Policy Optimization (MPO) that performs policy iteration based on a learned state-value function. We show that V-MPO surpasses previously reported scores for both the Atari-57 and DMLab-30 benchmark suites in the multi-task setting, and does so reliably without importance weighting, entropy regularization, or population-based tuning of hyperparameters. On individual DMLab and Atari levels, the proposed algorithm can achieve scores that are substantially higher than has previously been reported. V-MPO is also applicable to problems with high-dimensional, continuous action spaces, which we demonstrate in the context of learning to control simulated humanoids with 22 degrees of freedom from full state observations and 56 degrees of freedom from pixel observations, as well as example OpenAI Gym tasks where V-MPO achieves substantially higher asymptotic scores than previously reported. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.12238v1-abstract-full').style.display = 'none'; document.getElementById('1909.12238v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">* equal contribution</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.11786">arXiv:1909.11786</a> <span> [<a href="https://arxiv.org/pdf/1909.11786">pdf</a>, <a href="https://arxiv.org/format/1909.11786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Modeling of Deep Features for Out-of-Distribution and Adversarial Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+N+A">Nilesh A. Ahuja</a>, <a href="/search/cs?searchtype=author&query=Ndiour%2C+I">Ibrahima Ndiour</a>, <a href="/search/cs?searchtype=author&query=Kalyanpur%2C+T">Trushant Kalyanpur</a>, <a href="/search/cs?searchtype=author&query=Tickoo%2C+O">Omesh Tickoo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.11786v1-abstract-short" style="display: inline;"> We present a principled approach for detecting out-of-distribution (OOD) and adversarial samples in deep neural networks. Our approach consists in modeling the outputs of the various layers (deep features) with parametric probability distributions once training is completed. At inference, the likelihoods of the deep features w.r.t the previously learnt distributions are calculated and used to deri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11786v1-abstract-full').style.display = 'inline'; document.getElementById('1909.11786v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.11786v1-abstract-full" style="display: none;"> We present a principled approach for detecting out-of-distribution (OOD) and adversarial samples in deep neural networks. Our approach consists in modeling the outputs of the various layers (deep features) with parametric probability distributions once training is completed. At inference, the likelihoods of the deep features w.r.t the previously learnt distributions are calculated and used to derive uncertainty estimates that can discriminate in-distribution samples from OOD samples. We explore the use of two classes of multivariate distributions for modeling the deep features - Gaussian and Gaussian mixture - and study the trade-off between accuracy and computational complexity. We demonstrate benefits of our approach on image features by detecting OOD images and adversarially-generated images, using popular DNN architectures on MNIST and CIFAR10 datasets. We show that more precise modeling of the feature distributions result in significantly improved detection of OOD and adversarial samples; up to 12 percentage points in AUPR and AUROC metrics. We further show that our approach remains extremely effective when applied to video data and associated spatio-temporal features by detecting adversarial samples on activity classification tasks using UCF101 dataset, and the C3D network. To our knowledge, our methodology is the first one reported for reliably detecting white-box adversarial framing, a state-of-the-art adversarial attack for video classifiers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11786v1-abstract-full').style.display = 'none'; document.getElementById('1909.11786v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.11174">arXiv:1903.11174</a> <span> [<a href="https://arxiv.org/pdf/1903.11174">pdf</a>, <a href="https://arxiv.org/format/1903.11174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Improved Generalization of Heading Direction Estimation for Aerial Filming Using Semi-supervised Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenshan Wang</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aayush Ahuja</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanfu Zhang</a>, <a href="/search/cs?searchtype=author&query=Bonatti%2C+R">Rogerio Bonatti</a>, <a href="/search/cs?searchtype=author&query=Scherer%2C+S">Sebastian Scherer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.11174v1-abstract-short" style="display: inline;"> In the task of Autonomous aerial filming of a moving actor (e.g. a person or a vehicle), it is crucial to have a good heading direction estimation for the actor from the visual input. However, the models obtained in other similar tasks, such as pedestrian collision risk analysis and human-robot interaction, are very difficult to generalize to the aerial filming task, because of the difference in d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11174v1-abstract-full').style.display = 'inline'; document.getElementById('1903.11174v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.11174v1-abstract-full" style="display: none;"> In the task of Autonomous aerial filming of a moving actor (e.g. a person or a vehicle), it is crucial to have a good heading direction estimation for the actor from the visual input. However, the models obtained in other similar tasks, such as pedestrian collision risk analysis and human-robot interaction, are very difficult to generalize to the aerial filming task, because of the difference in data distributions. Towards improving generalization with less amount of labeled data, this paper presents a semi-supervised algorithm for heading direction estimation problem. We utilize temporal continuity as the unsupervised signal to regularize the model and achieve better generalization ability. This semi-supervised algorithm is applied to both training and testing phases, which increases the testing performance by a large margin. We show that by leveraging unlabeled sequences, the amount of labeled data required can be significantly reduced. We also discuss several important details on improving the performance by balancing labeled and unlabeled loss, and making good combinations. Experimental results show that our approach robustly outputs the heading direction for different types of actor. The aesthetic value of the video is also improved in the aerial filming task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11174v1-abstract-full').style.display = 'none'; document.getElementById('1903.11174v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.07438">arXiv:1903.07438</a> <span> [<a href="https://arxiv.org/pdf/1903.07438">pdf</a>, <a href="https://arxiv.org/format/1903.07438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Hierarchy for Learning and Transfer in KL-regularized RL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tirumala%2C+D">Dhruva Tirumala</a>, <a href="/search/cs?searchtype=author&query=Noh%2C+H">Hyeonwoo Noh</a>, <a href="/search/cs?searchtype=author&query=Galashov%2C+A">Alexandre Galashov</a>, <a href="/search/cs?searchtype=author&query=Hasenclever%2C+L">Leonard Hasenclever</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Pascanu%2C+R">Razvan Pascanu</a>, <a href="/search/cs?searchtype=author&query=Teh%2C+Y+W">Yee Whye Teh</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.07438v2-abstract-short" style="display: inline;"> As reinforcement learning agents are tasked with solving more challenging and diverse tasks, the ability to incorporate prior knowledge into the learning system and to exploit reusable structure in solution space is likely to become increasingly important. The KL-regularized expected reward objective constitutes one possible tool to this end. It introduces an additional component, a default or pri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.07438v2-abstract-full').style.display = 'inline'; document.getElementById('1903.07438v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.07438v2-abstract-full" style="display: none;"> As reinforcement learning agents are tasked with solving more challenging and diverse tasks, the ability to incorporate prior knowledge into the learning system and to exploit reusable structure in solution space is likely to become increasingly important. The KL-regularized expected reward objective constitutes one possible tool to this end. It introduces an additional component, a default or prior behavior, which can be learned alongside the policy and as such partially transforms the reinforcement learning problem into one of behavior modelling. In this work we consider the implications of this framework in cases where both the policy and default behavior are augmented with latent variables. We discuss how the resulting hierarchical structures can be used to implement different inductive biases and how their modularity can benefit transfer. Empirically we find that they can lead to faster learning and transfer on a range of continuous control tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.07438v2-abstract-full').style.display = 'none'; document.getElementById('1903.07438v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.11711">arXiv:1811.11711</a> <span> [<a href="https://arxiv.org/pdf/1811.11711">pdf</a>, <a href="https://arxiv.org/format/1811.11711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Neural probabilistic motor primitives for humanoid control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Merel%2C+J">Josh Merel</a>, <a href="/search/cs?searchtype=author&query=Hasenclever%2C+L">Leonard Hasenclever</a>, <a href="/search/cs?searchtype=author&query=Galashov%2C+A">Alexandre Galashov</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+V">Vu Pham</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Teh%2C+Y+W">Yee Whye Teh</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.11711v2-abstract-short" style="display: inline;"> We focus on the problem of learning a single motor module that can flexibly express a range of behaviors for the control of high-dimensional physically simulated humanoids. To do this, we propose a motor architecture that has the general structure of an inverse model with a latent-variable bottleneck. We show that it is possible to train this model entirely offline to compress thousands of expert… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.11711v2-abstract-full').style.display = 'inline'; document.getElementById('1811.11711v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.11711v2-abstract-full" style="display: none;"> We focus on the problem of learning a single motor module that can flexibly express a range of behaviors for the control of high-dimensional physically simulated humanoids. To do this, we propose a motor architecture that has the general structure of an inverse model with a latent-variable bottleneck. We show that it is possible to train this model entirely offline to compress thousands of expert policies and learn a motor primitive embedding space. The trained neural probabilistic motor primitive system can perform one-shot imitation of whole-body humanoid behaviors, robustly mimicking unseen trajectories. Additionally, we demonstrate that it is also straightforward to train controllers to reuse the learned motor primitive space to solve tasks, and the resulting movements are relatively naturalistic. To support the training of our model, we compare two approaches for offline policy cloning, including an experience efficient method which we call linear feedback policy cloning. We encourage readers to view a supplementary video ( https://youtu.be/CaDEf-QcKwA ) summarizing our results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.11711v2-abstract-full').style.display = 'none'; document.getElementById('1811.11711v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a conference paper at ICLR 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.11682">arXiv:1811.11682</a> <span> [<a href="https://arxiv.org/pdf/1811.11682">pdf</a>, <a href="https://arxiv.org/format/1811.11682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Experience Replay for Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rolnick%2C+D">David Rolnick</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Schwarz%2C+J">Jonathan Schwarz</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T+P">Timothy P. Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.11682v2-abstract-short" style="display: inline;"> Continual learning is the problem of learning new tasks or knowledge while protecting old knowledge and ideally generalizing from old experience to learn new tasks faster. Neural networks trained by stochastic gradient descent often degrade on old tasks when trained successively on new tasks with different data distributions. This phenomenon, referred to as catastrophic forgetting, is considered a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.11682v2-abstract-full').style.display = 'inline'; document.getElementById('1811.11682v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.11682v2-abstract-full" style="display: none;"> Continual learning is the problem of learning new tasks or knowledge while protecting old knowledge and ideally generalizing from old experience to learn new tasks faster. Neural networks trained by stochastic gradient descent often degrade on old tasks when trained successively on new tasks with different data distributions. This phenomenon, referred to as catastrophic forgetting, is considered a major hurdle to learning with non-stationary data or sequences of new tasks, and prevents networks from continually accumulating knowledge and skills. We examine this issue in the context of reinforcement learning, in a setting where an agent is exposed to tasks in a sequence. Unlike most other work, we do not provide an explicit indication to the model of task boundaries, which is the most general circumstance for a learning agent exposed to continuous experience. While various methods to counteract catastrophic forgetting have recently been proposed, we explore a straightforward, general, and seemingly overlooked solution - that of using experience replay buffers for all past events - with a mixture of on- and off-policy learning, leveraging behavioral cloning. We show that this strategy can still learn new tasks quickly yet can substantially reduce catastrophic forgetting in both Atari and DMLab domains, even matching the performance of methods that require task identities. When buffer storage is constrained, we confirm that a simple mechanism for randomly discarding data allows a limited size buffer to perform almost as well as an unbounded one. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.11682v2-abstract-full').style.display = 'none'; document.getElementById('1811.11682v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.09656">arXiv:1811.09656</a> <span> [<a href="https://arxiv.org/pdf/1811.09656">pdf</a>, <a href="https://arxiv.org/format/1811.09656">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical visuomotor control of humanoids </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Merel%2C+J">Josh Merel</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+V">Vu Pham</a>, <a href="/search/cs?searchtype=author&query=Tunyasuvunakool%2C+S">Saran Tunyasuvunakool</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Siqi Liu</a>, <a href="/search/cs?searchtype=author&query=Tirumala%2C+D">Dhruva Tirumala</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.09656v2-abstract-short" style="display: inline;"> We aim to build complex humanoid agents that integrate perception, motor control, and memory. In this work, we partly factor this problem into low-level motor control from proprioception and high-level coordination of the low-level skills informed by vision. We develop an architecture capable of surprisingly flexible, task-directed motor control of a relatively high-DoF humanoid body by combining… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.09656v2-abstract-full').style.display = 'inline'; document.getElementById('1811.09656v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.09656v2-abstract-full" style="display: none;"> We aim to build complex humanoid agents that integrate perception, motor control, and memory. In this work, we partly factor this problem into low-level motor control from proprioception and high-level coordination of the low-level skills informed by vision. We develop an architecture capable of surprisingly flexible, task-directed motor control of a relatively high-DoF humanoid body by combining pre-training of low-level motor controllers with a high-level, task-focused controller that switches among low-level sub-policies. The resulting system is able to control a physically-simulated humanoid body to solve tasks that require coupling visual perception from an unstabilized egocentric RGB camera during locomotion in the environment. For a supplementary video link, see https://youtu.be/7GISvfbykLE . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.09656v2-abstract-full').style.display = 'none'; document.getElementById('1811.09656v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a conference paper at ICLR 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.06721">arXiv:1810.06721</a> <span> [<a href="https://arxiv.org/pdf/1810.06721">pdf</a>, <a href="https://arxiv.org/format/1810.06721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Agent Behavior over Long Time Scales by Transporting Value </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hung%2C+C">Chia-Chun Hung</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yan Wu</a>, <a href="/search/cs?searchtype=author&query=Mirza%2C+M">Mehdi Mirza</a>, <a href="/search/cs?searchtype=author&query=Carnevale%2C+F">Federico Carnevale</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.06721v2-abstract-short" style="display: inline;"> Humans spend a remarkable fraction of waking life engaged in acts of "mental time travel". We dwell on our actions in the past and experience satisfaction or regret. More than merely autobiographical storytelling, we use these event recollections to change how we will act in similar scenarios in the future. This process endows us with a computationally important ability to link actions and consequ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.06721v2-abstract-full').style.display = 'inline'; document.getElementById('1810.06721v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.06721v2-abstract-full" style="display: none;"> Humans spend a remarkable fraction of waking life engaged in acts of "mental time travel". We dwell on our actions in the past and experience satisfaction or regret. More than merely autobiographical storytelling, we use these event recollections to change how we will act in similar scenarios in the future. This process endows us with a computationally important ability to link actions and consequences across long spans of time, which figures prominently in addressing the problem of long-term temporal credit assignment; in artificial intelligence (AI) this is the question of how to evaluate the utility of the actions within a long-duration behavioral sequence leading to success or failure in a task. Existing approaches to shorter-term credit assignment in AI cannot solve tasks with long delays between actions and consequences. Here, we introduce a new paradigm for reinforcement learning where agents use recall of specific memories to credit actions from the past, allowing them to solve problems that are intractable for existing algorithms. This paradigm broadens the scope of problems that can be investigated in AI and offers a mechanistic account of behaviors that may inspire computational models in neuroscience, psychology, and behavioral economics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.06721v2-abstract-full').style.display = 'none'; document.getElementById('1810.06721v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.00593">arXiv:1806.00593</a> <span> [<a href="https://arxiv.org/pdf/1806.00593">pdf</a>, <a href="https://arxiv.org/format/1806.00593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BoxNet: Deep Learning Based Biomedical Image Segmentation Using Boxes Only Annotation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lin Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhuo Zhao</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hao Zheng</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+P">Peixian Liang</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+M+T+C">Michael T. C. Ying</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A+T">Anil T. Ahuja</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D+Z">Danny Z. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.00593v1-abstract-short" style="display: inline;"> In recent years, deep learning (DL) methods have become powerful tools for biomedical image segmentation. However, high annotation efforts and costs are commonly needed to acquire sufficient biomedical training data for DL models. To alleviate the burden of manual annotation, in this paper, we propose a new weakly supervised DL approach for biomedical image segmentation using boxes only annotation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.00593v1-abstract-full').style.display = 'inline'; document.getElementById('1806.00593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.00593v1-abstract-full" style="display: none;"> In recent years, deep learning (DL) methods have become powerful tools for biomedical image segmentation. However, high annotation efforts and costs are commonly needed to acquire sufficient biomedical training data for DL models. To alleviate the burden of manual annotation, in this paper, we propose a new weakly supervised DL approach for biomedical image segmentation using boxes only annotation. First, we develop a method to combine graph search (GS) and DL to generate fine object masks from box annotation, in which DL uses box annotation to compute a rough segmentation for GS and then GS is applied to locate the optimal object boundaries. During the mask generation process, we carefully utilize information from box annotation to filter out potential errors, and then use the generated masks to train an accurate DL segmentation network. Extensive experiments on gland segmentation in histology images, lymph node segmentation in ultrasound images, and fungus segmentation in electron microscopy images show that our approach attains superior performance over the best known state-of-the-art weakly supervised DL method and is able to achieve (1) nearly the same accuracy compared to fully supervised DL methods with far less annotation effort, (2) significantly better results with similar annotation time, and (3) robust performance in various applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.00593v1-abstract-full').style.display = 'none'; document.getElementById('1806.00593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.09738">arXiv:1805.09738</a> <span> [<a href="https://arxiv.org/pdf/1805.09738">pdf</a>, <a href="https://arxiv.org/format/1805.09738">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Detecting Homoglyph Attacks with a Siamese Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Woodbridge%2C+J">Jonathan Woodbridge</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+H+S">Hyrum S. Anderson</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Anjum Ahuja</a>, <a href="/search/cs?searchtype=author&query=Grant%2C+D">Daniel Grant</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.09738v1-abstract-short" style="display: inline;"> A homoglyph (name spoofing) attack is a common technique used by adversaries to obfuscate file and domain names. This technique creates process or domain names that are visually similar to legitimate and recognized names. For instance, an attacker may create malware with the name svch0st.exe so that in a visual inspection of running processes or a directory listing, the process or file name might… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.09738v1-abstract-full').style.display = 'inline'; document.getElementById('1805.09738v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.09738v1-abstract-full" style="display: none;"> A homoglyph (name spoofing) attack is a common technique used by adversaries to obfuscate file and domain names. This technique creates process or domain names that are visually similar to legitimate and recognized names. For instance, an attacker may create malware with the name svch0st.exe so that in a visual inspection of running processes or a directory listing, the process or file name might be mistaken as the Windows system process svchost.exe. There has been limited published research on detecting homoglyph attacks. Current approaches rely on string comparison algorithms (such as Levenshtein distance) that result in computationally heavy solutions with a high number of false positives. In addition, there is a deficiency in the number of publicly available datasets for reproducible research, with most datasets focused on phishing attacks, in which homoglyphs are not always used. This paper presents a fundamentally different solution to this problem using a Siamese convolutional neural network (CNN). Rather than leveraging similarity based on character swaps and deletions, this technique uses a learned metric on strings rendered as images: a CNN learns features that are optimized to detect visual similarity of the rendered strings. The trained model is used to convert thousands of potentially targeted process or domain names to feature vectors. These feature vectors are indexed using randomized KD-Trees to make similarity searches extremely fast with minimal computational processing. This technique shows a considerable 13% to 45% improvement over baseline techniques in terms of area under the receiver operating characteristic curve (ROC AUC). In addition, we provide both code and data to further future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.09738v1-abstract-full').style.display = 'none'; document.getElementById('1805.09738v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.01128">arXiv:1804.01128</a> <span> [<a href="https://arxiv.org/pdf/1804.01128">pdf</a>, <a href="https://arxiv.org/format/1804.01128">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Probing Physics Knowledge Using Tools from Developmental Psychology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Piloto%2C+L">Luis Piloto</a>, <a href="/search/cs?searchtype=author&query=Weinstein%2C+A">Ari Weinstein</a>, <a href="/search/cs?searchtype=author&query=TB%2C+D">Dhruva TB</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Mirza%2C+M">Mehdi Mirza</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Amos%2C+D">David Amos</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+C">Chia-chun Hung</a>, <a href="/search/cs?searchtype=author&query=Botvinick%2C+M">Matt Botvinick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.01128v1-abstract-short" style="display: inline;"> In order to build agents with a rich understanding of their environment, one key objective is to endow them with a grasp of intuitive physics; an ability to reason about three-dimensional objects, their dynamic interactions, and responses to forces. While some work on this problem has taken the approach of building in components such as ready-made physics engines, other research aims to extract ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.01128v1-abstract-full').style.display = 'inline'; document.getElementById('1804.01128v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.01128v1-abstract-full" style="display: none;"> In order to build agents with a rich understanding of their environment, one key objective is to endow them with a grasp of intuitive physics; an ability to reason about three-dimensional objects, their dynamic interactions, and responses to forces. While some work on this problem has taken the approach of building in components such as ready-made physics engines, other research aims to extract general physical concepts directly from sensory data. In the latter case, one challenge that arises is evaluating the learning system. Research on intuitive physics knowledge in children has long employed a violation of expectations (VOE) method to assess children's mastery of specific physical concepts. We take the novel step of applying this method to artificial learning systems. In addition to introducing the VOE technique, we describe a set of probe datasets inspired by classic test stimuli from developmental psychology. We test a baseline deep learning system on this battery, as well as on a physics learning dataset ("IntPhys") recently posed by another research group. Our results show how the VOE technique may provide a useful tool for tracking physics knowledge in future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.01128v1-abstract-full').style.display = 'none'; document.getElementById('1804.01128v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1803.10760">arXiv:1803.10760</a> <span> [<a href="https://arxiv.org/pdf/1803.10760">pdf</a>, <a href="https://arxiv.org/format/1803.10760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Predictive Memory in a Goal-Directed Agent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wayne%2C+G">Greg Wayne</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+C">Chia-Chun Hung</a>, <a href="/search/cs?searchtype=author&query=Amos%2C+D">David Amos</a>, <a href="/search/cs?searchtype=author&query=Mirza%2C+M">Mehdi Mirza</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Arun Ahuja</a>, <a href="/search/cs?searchtype=author&query=Grabska-Barwinska%2C+A">Agnieszka Grabska-Barwinska</a>, <a href="/search/cs?searchtype=author&query=Rae%2C+J">Jack Rae</a>, <a href="/search/cs?searchtype=author&query=Mirowski%2C+P">Piotr Mirowski</a>, <a href="/search/cs?searchtype=author&query=Leibo%2C+J+Z">Joel Z. Leibo</a>, <a href="/search/cs?searchtype=author&query=Santoro%2C+A">Adam Santoro</a>, <a href="/search/cs?searchtype=author&query=Gemici%2C+M">Mevlana Gemici</a>, <a href="/search/cs?searchtype=author&query=Reynolds%2C+M">Malcolm Reynolds</a>, <a href="/search/cs?searchtype=author&query=Harley%2C+T">Tim Harley</a>, <a href="/search/cs?searchtype=author&query=Abramson%2C+J">Josh Abramson</a>, <a href="/search/cs?searchtype=author&query=Mohamed%2C+S">Shakir Mohamed</a>, <a href="/search/cs?searchtype=author&query=Rezende%2C+D">Danilo Rezende</a>, <a href="/search/cs?searchtype=author&query=Saxton%2C+D">David Saxton</a>, <a href="/search/cs?searchtype=author&query=Cain%2C+A">Adam Cain</a>, <a href="/search/cs?searchtype=author&query=Hillier%2C+C">Chloe Hillier</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&query=Kavukcuoglu%2C+K">Koray Kavukcuoglu</a>, <a href="/search/cs?searchtype=author&query=Botvinick%2C+M">Matt Botvinick</a>, <a href="/search/cs?searchtype=author&query=Hassabis%2C+D">Demis Hassabis</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1803.10760v1-abstract-short" style="display: inline;"> Animals execute goal-directed behaviours despite the limited range and scope of their sensors. To cope, they explore environments and store memories maintaining estimates of important information that is not presently available. Recently, progress has been made with artificial intelligence (AI) agents that learn to perform tasks from sensory input, even at a human level, by merging reinforcement l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.10760v1-abstract-full').style.display = 'inline'; document.getElementById('1803.10760v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1803.10760v1-abstract-full" style="display: none;"> Animals execute goal-directed behaviours despite the limited range and scope of their sensors. To cope, they explore environments and store memories maintaining estimates of important information that is not presently available. Recently, progress has been made with artificial intelligence (AI) agents that learn to perform tasks from sensory input, even at a human level, by merging reinforcement learning (RL) algorithms with deep neural networks, and the excitement surrounding these results has led to the pursuit of related ideas as explanations of non-human animal learning. However, we demonstrate that contemporary RL algorithms struggle to solve simple tasks when enough information is concealed from the sensors of the agent, a property called "partial observability". An obvious requirement for handling partially observed tasks is access to extensive memory, but we show memory is not enough; it is critical that the right information be stored in the right format. We develop a model, the Memory, RL, and Inference Network (MERLIN), in which memory formation is guided by a process of predictive modeling. MERLIN facilitates the solution of tasks in 3D virtual reality environments for which partial observability is severe and memories must be maintained over long durations. Our model demonstrates a single learning agent architecture that can solve canonical behavioural tasks in psychology and neurobiology without strong simplifying assumptions about the dimensionality of sensory input or the duration of experiences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.10760v1-abstract-full').style.display = 'none'; document.getElementById('1803.10760v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.09259">arXiv:1712.09259</a> <span> [<a href="https://arxiv.org/pdf/1712.09259">pdf</a>, <a href="https://arxiv.org/format/1712.09259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Intention Games: Towards Strategic Coexistence between Partially Honest and Blind Players </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.09259v2-abstract-short" style="display: inline;"> Strategic interactions between competitive entities are generally considered from the perspective of complete revelation of benefits achieved from those interactions, in the form of public payoff functions and/or beliefs, in the announced games. However, there exist strategic interplays between competitors where the players have a choice to strategise under the availability of private payoffs, in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.09259v2-abstract-full').style.display = 'inline'; document.getElementById('1712.09259v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.09259v2-abstract-full" style="display: none;"> Strategic interactions between competitive entities are generally considered from the perspective of complete revelation of benefits achieved from those interactions, in the form of public payoff functions and/or beliefs, in the announced games. However, there exist strategic interplays between competitors where the players have a choice to strategise under the availability of private payoffs, in similar competitive settings. In this contribution, we propose a formal framework for a competitive ecosystem where each player is permitted to defect from publicly optimal strategies under certain private payoffs greater than announced payoffs, given that these defections have certain acceptable bounds in the long run as agreed by all players. We call this game theoretic construction an Intention Game. We formally define an Intention Game, and notions of participational equilibria that exist in such interactions that permit public defections. We compare Intention Games with conventional strategic form games, and demonstrate a type-theoretic construction of Intention Games. In a partially honest setting, we give Intention Game instances of a Cournot competition, secure interactions between mobile applications, an Internet services' data sourcing competition between Internet service providers through content delivery networks, and a Bitcoin mining competition. We give a use of Intention Games to determine player participation in a cryptographic protocol. Finally, we demonstrate the possibility of a dual model of the Intention Games framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.09259v2-abstract-full').style.display = 'none'; document.getElementById('1712.09259v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 2 figures; major revision to the game with new examples</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.00207">arXiv:1703.00207</a> <span> [<a href="https://arxiv.org/pdf/1703.00207">pdf</a>, <a href="https://arxiv.org/ps/1703.00207">ps</a>, <a href="https://arxiv.org/format/1703.00207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> A Quantum-Classical Scheme towards Quantum Functional Encryption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Aditya Ahuja</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.00207v1-abstract-short" style="display: inline;"> Quantum encryption is a well studied problem for both classical and quantum information. However, little is known about quantum encryption schemes which enable the user, under different keys, to learn different functions of the plaintext, given the ciphertext. In this paper, we give a novel one-bit secret-key quantum encryption scheme, a classical extension of which allows different key holders to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.00207v1-abstract-full').style.display = 'inline'; document.getElementById('1703.00207v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.00207v1-abstract-full" style="display: none;"> Quantum encryption is a well studied problem for both classical and quantum information. However, little is known about quantum encryption schemes which enable the user, under different keys, to learn different functions of the plaintext, given the ciphertext. In this paper, we give a novel one-bit secret-key quantum encryption scheme, a classical extension of which allows different key holders to learn different length subsequences of the plaintext from the ciphertext. We prove our quantum-classical scheme secure under the notions of quantum semantic security, quantum entropic indistinguishability, and recent security definitions from the field of functional encryption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.00207v1-abstract-full').style.display = 'none'; document.getElementById('1703.00207v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1611.00791">arXiv:1611.00791</a> <span> [<a href="https://arxiv.org/pdf/1611.00791">pdf</a>, <a href="https://arxiv.org/format/1611.00791">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Predicting Domain Generation Algorithms with Long Short-Term Memory Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Woodbridge%2C+J">Jonathan Woodbridge</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+H+S">Hyrum S. Anderson</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">Anjum Ahuja</a>, <a href="/search/cs?searchtype=author&query=Grant%2C+D">Daniel Grant</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1611.00791v1-abstract-short" style="display: inline;"> Various families of malware use domain generation algorithms (DGAs) to generate a large number of pseudo-random domain names to connect to a command and control (C&C) server. In order to block DGA C&C traffic, security organizations must first discover the algorithm by reverse engineering malware samples, then generating a list of domains for a given seed. The domains are then either preregistered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1611.00791v1-abstract-full').style.display = 'inline'; document.getElementById('1611.00791v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1611.00791v1-abstract-full" style="display: none;"> Various families of malware use domain generation algorithms (DGAs) to generate a large number of pseudo-random domain names to connect to a command and control (C&C) server. In order to block DGA C&C traffic, security organizations must first discover the algorithm by reverse engineering malware samples, then generating a list of domains for a given seed. The domains are then either preregistered or published in a DNS blacklist. This process is not only tedious, but can be readily circumvented by malware authors using a large number of seeds in algorithms with multivariate recurrence properties (e.g., banjori) or by using a dynamic list of seeds (e.g., bedep). Another technique to stop malware from using DGAs is to intercept DNS queries on a network and predict whether domains are DGA generated. Such a technique will alert network administrators to the presence of malware on their networks. In addition, if the predictor can also accurately predict the family of DGAs, then network administrators can also be alerted to the type of malware that is on their networks. This paper presents a DGA classifier that leverages long short-term memory (LSTM) networks to predict DGAs and their respective families without the need for a priori feature extraction. Results are significantly better than state-of-the-art techniques, providing 0.9993 area under the receiver operating characteristic curve for binary classification and a micro-averaged F1 score of 0.9906. In other terms, the LSTM technique can provide a 90% detection rate with a 1:10000 false positive (FP) rate---a twenty times FP improvement over comparable methods. Experiments in this paper are run on open datasets and code snippets are provided to reproduce the results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1611.00791v1-abstract-full').style.display = 'none'; document.getElementById('1611.00791v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1203.3920">arXiv:1203.3920</a> <span> [<a href="https://arxiv.org/pdf/1203.3920">pdf</a>, <a href="https://arxiv.org/format/1203.3920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Stochastic Characteristics and Simulation of the Random Waypoint Mobility Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahuja%2C+A">A. Ahuja</a>, <a href="/search/cs?searchtype=author&query=Venkateswarlu%2C+K">K. Venkateswarlu</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+P+V">P. Venkata Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1203.3920v1-abstract-short" style="display: inline;"> Simulation results for Mobile Ad-Hoc Networks (MANETs) are fundamentally governed by the underlying Mobility Model. Thus it is imperative to find whether events functionally dependent on the mobility model 'converge' to well defined functions or constants. This shall ensure the long-run consistency among simulation performed by disparate parties. This paper reviews a work on the discrete Random Wa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1203.3920v1-abstract-full').style.display = 'inline'; document.getElementById('1203.3920v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1203.3920v1-abstract-full" style="display: none;"> Simulation results for Mobile Ad-Hoc Networks (MANETs) are fundamentally governed by the underlying Mobility Model. Thus it is imperative to find whether events functionally dependent on the mobility model 'converge' to well defined functions or constants. This shall ensure the long-run consistency among simulation performed by disparate parties. This paper reviews a work on the discrete Random Waypoint Mobility Model (RWMM), addressing its long run stochastic stability. It is proved that each model in the targeted discrete class of the RWMM satisfies Birkhoff's pointwise ergodic theorem [13], and hence time averaged functions on the mobility model surely converge. We also simulate the most common and general version of the RWMM to give insight into its working. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1203.3920v1-abstract-full').style.display = 'none'; document.getElementById('1203.3920v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2012. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository