CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–38 of 38 results for author: <span class="mathjax">Rosman, G</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Rosman%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Rosman, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Rosman%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Rosman, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16554">arXiv:2411.16554</a> <span> [<a href="https://arxiv.org/pdf/2411.16554">pdf</a>, <a href="https://arxiv.org/format/2411.16554">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generating Out-Of-Distribution Scenarios Using Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aasi%2C+E">Erfan Aasi</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+P">Phat Nguyen</a>, <a href="/search/cs?searchtype=author&query=Sreeram%2C+S">Shiva Sreeram</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16554v1-abstract-short" style="display: inline;"> The deployment of autonomous vehicles controlled by machine learning techniques requires extensive testing in diverse real-world environments, robust handling of edge cases and out-of-distribution scenarios, and comprehensive safety validation to ensure that these systems can navigate safely and effectively under unpredictable conditions. Addressing Out-Of-Distribution (OOD) driving scenarios is e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16554v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16554v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16554v1-abstract-full" style="display: none;"> The deployment of autonomous vehicles controlled by machine learning techniques requires extensive testing in diverse real-world environments, robust handling of edge cases and out-of-distribution scenarios, and comprehensive safety validation to ensure that these systems can navigate safely and effectively under unpredictable conditions. Addressing Out-Of-Distribution (OOD) driving scenarios is essential for enhancing safety, as OOD scenarios help validate the reliability of the models within the vehicle's autonomy stack. However, generating OOD scenarios is challenging due to their long-tailed distribution and rarity in urban driving dataset. Recently, Large Language Models (LLMs) have shown promise in autonomous driving, particularly for their zero-shot generalization and common-sense reasoning capabilities. In this paper, we leverage these LLM strengths to introduce a framework for generating diverse OOD driving scenarios. Our approach uses LLMs to construct a branching tree, where each branch represents a unique OOD scenario. These scenarios are then simulated in the CARLA simulator using an automated framework that aligns scene augmentation with the corresponding textual descriptions. We evaluate our framework through extensive simulations, and assess its performance via a diversity metric that measures the richness of the scenarios. Additionally, we introduce a new "OOD-ness" metric, which quantifies how much the generated scenarios deviate from typical urban driving conditions. Furthermore, we explore the capacity of modern Vision-Language Models (VLMs) to interpret and safely navigate through the simulated OOD scenarios. Our findings offer valuable insights into the reliability of language models in addressing OOD scenarios within the context of urban driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16554v1-abstract-full').style.display = 'none'; document.getElementById('2411.16554v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14177">arXiv:2410.14177</a> <span> [<a href="https://arxiv.org/pdf/2410.14177">pdf</a>, <a href="https://arxiv.org/format/2410.14177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning autonomous driving from aerial imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Murali%2C+V">Varun Murali</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14177v1-abstract-short" style="display: inline;"> In this work, we consider the problem of learning end to end perception to control for ground vehicles solely from aerial imagery. Photogrammetric simulators allow the synthesis of novel views through the transformation of pre-generated assets into novel views.However, they have a large setup cost, require careful collection of data and often human effort to create usable simulators. We use a Neur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14177v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14177v1-abstract-full" style="display: none;"> In this work, we consider the problem of learning end to end perception to control for ground vehicles solely from aerial imagery. Photogrammetric simulators allow the synthesis of novel views through the transformation of pre-generated assets into novel views.However, they have a large setup cost, require careful collection of data and often human effort to create usable simulators. We use a Neural Radiance Field (NeRF) as an intermediate representation to synthesize novel views from the point of view of a ground vehicle. These novel viewpoints can then be used for several downstream autonomous navigation applications. In this work, we demonstrate the utility of novel view synthesis though the application of training a policy for end to end learning from images and depth data. In a traditional real to sim to real framework, the collected data would be transformed into a visual simulator which could then be used to generate novel views. In contrast, using a NeRF allows a compact representation and the ability to optimize over the parameters of the visual simulator as more data is gathered in the environment. We demonstrate the efficacy of our method in a custom built mini-city environment through the deployment of imitation policies on robotic cars. We additionally consider the task of place localization and demonstrate that our method is able to relocalize the car in the real world. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14177v1-abstract-full').style.display = 'none'; document.getElementById('2410.14177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at IROS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10062">arXiv:2410.10062</a> <span> [<a href="https://arxiv.org/pdf/2410.10062">pdf</a>, <a href="https://arxiv.org/format/2410.10062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Dreaming to Assist: Learning to Align with Human Objectives for Shared Control in High-Speed Racing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Silva%2C+A">Andrew Silva</a>, <a href="/search/cs?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&query=Sumner%2C+E">Emily Sumner</a>, <a href="/search/cs?searchtype=author&query=Balch%2C+T+M">Thomas M. Balch</a>, <a href="/search/cs?searchtype=author&query=Dees%2C+L">Laporsha Dees</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10062v1-abstract-short" style="display: inline;"> Tight coordination is required for effective human-robot teams in domains involving fast dynamics and tactical decisions, such as multi-car racing. In such settings, robot teammates must react to cues of a human teammate's tactical objective to assist in a way that is consistent with the objective (e.g., navigating left or right around an obstacle). To address this challenge, we present Dream2Assi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10062v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10062v1-abstract-full" style="display: none;"> Tight coordination is required for effective human-robot teams in domains involving fast dynamics and tactical decisions, such as multi-car racing. In such settings, robot teammates must react to cues of a human teammate's tactical objective to assist in a way that is consistent with the objective (e.g., navigating left or right around an obstacle). To address this challenge, we present Dream2Assist, a framework that combines a rich world model able to infer human objectives and value functions, and an assistive agent that provides appropriate expert assistance to a given human teammate. Our approach builds on a recurrent state space model to explicitly infer human intents, enabling the assistive agent to select actions that align with the human and enabling a fluid teaming interaction. We demonstrate our approach in a high-speed racing domain with a population of synthetic human drivers pursuing mutually exclusive objectives, such as "stay-behind" and "overtake". We show that the combined human-robot team, when blending its actions with those of the human, outperforms the synthetic humans alone as well as several baseline assistance strategies, and that intent-conditioning enables adherence to human preferences during task execution, leading to improved performance while satisfying the human's objective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10062v1-abstract-full').style.display = 'none'; document.getElementById('2410.10062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CoRL 2024, Munich, Germany</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01608">arXiv:2410.01608</a> <span> [<a href="https://arxiv.org/pdf/2410.01608">pdf</a>, <a href="https://arxiv.org/format/2410.01608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Computational Teaching for Driving via Multi-Task Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+X">Xiongyi Cui</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Sumner%2C+E">Emily Sumner</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+J">Jean Costa</a>, <a href="/search/cs?searchtype=author&query=Yasuda%2C+H">Hiroshi Yasuda</a>, <a href="/search/cs?searchtype=author&query=Morgan%2C+A">Allison Morgan</a>, <a href="/search/cs?searchtype=author&query=Dees%2C+L">Laporsha Dees</a>, <a href="/search/cs?searchtype=author&query=Chau%2C+S">Sheryl Chau</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J">John Leonard</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tiffany Chen</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Balachandran%2C+A">Avinash Balachandran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01608v1-abstract-short" style="display: inline;"> Learning motor skills for sports or performance driving is often done with professional instruction from expert human teachers, whose availability is limited. Our goal is to enable automated teaching via a learned model that interacts with the student similar to a human teacher. However, training such automated teaching systems is limited by the availability of high-quality annotated datasets of e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01608v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01608v1-abstract-full" style="display: none;"> Learning motor skills for sports or performance driving is often done with professional instruction from expert human teachers, whose availability is limited. Our goal is to enable automated teaching via a learned model that interacts with the student similar to a human teacher. However, training such automated teaching systems is limited by the availability of high-quality annotated datasets of expert teacher and student interactions that are difficult to collect at scale. To address this data scarcity problem, we propose an approach for training a coaching system for complex motor tasks such as high performance driving via a Multi-Task Imitation Learning (MTIL) paradigm. MTIL allows our model to learn robust representations by utilizing self-supervised training signals from more readily available non-interactive datasets of humans performing the task of interest. We validate our approach with (1) a semi-synthetic dataset created from real human driving trajectories, (2) a professional track driving instruction dataset, (3) a track-racing driving simulator human-subject study, and (4) a system demonstration on an instrumented car at a race track. Our experiments show that the right set of auxiliary machine learning tasks improves performance in predicting teaching instructions. Moreover, in the human subjects study, students exposed to the instructions from our teaching system improve their ability to stay within track limits, and show favorable perception of the model's interaction with them, in terms of usefulness and satisfaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01608v1-abstract-full').style.display = 'none'; document.getElementById('2410.01608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14950">arXiv:2409.14950</a> <span> [<a href="https://arxiv.org/pdf/2409.14950">pdf</a>, <a href="https://arxiv.org/format/2409.14950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Online Adaptation of Learned Vehicle Dynamics Model with Meta-Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsuchiya%2C+Y">Yuki Tsuchiya</a>, <a href="/search/cs?searchtype=author&query=Balch%2C+T">Thomas Balch</a>, <a href="/search/cs?searchtype=author&query=Drews%2C+P">Paul Drews</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14950v1-abstract-short" style="display: inline;"> We represent a vehicle dynamics model for autonomous driving near the limits of handling via a multi-layer neural network. Online adaptation is desirable in order to address unseen environments. However, the model needs to adapt to new environments without forgetting previously encountered ones. In this study, we apply Continual-MAML to overcome this difficulty. It enables the model to adapt to th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14950v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14950v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14950v1-abstract-full" style="display: none;"> We represent a vehicle dynamics model for autonomous driving near the limits of handling via a multi-layer neural network. Online adaptation is desirable in order to address unseen environments. However, the model needs to adapt to new environments without forgetting previously encountered ones. In this study, we apply Continual-MAML to overcome this difficulty. It enables the model to adapt to the previously encountered environments quickly and efficiently by starting updates from optimized initial parameters. We evaluate the impact of online model adaptation with respect to inference performance and impact on control performance of a model predictive path integral (MPPI) controller using the TRIKart platform. The neural network was pre-trained using driving data collected in our test environment, and experiments for online adaptation were executed on multiple different road conditions not contained in the training data. Empirical results show that the model using Continual-MAML outperforms the fixed model and the model using gradient descent in test set loss and online tracking performance of MPPI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14950v1-abstract-full').style.display = 'none'; document.getElementById('2409.14950v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 Pages, 6 Figures, IROS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04738">arXiv:2409.04738</a> <span> [<a href="https://arxiv.org/pdf/2409.04738">pdf</a>, <a href="https://arxiv.org/format/2409.04738">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Modeling Drivers' Risk Perception via Attention to Improve Driving Assistance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Biswas%2C+A">Abhijat Biswas</a>, <a href="/search/cs?searchtype=author&query=Gideon%2C+J">John Gideon</a>, <a href="/search/cs?searchtype=author&query=Tamura%2C+K">Kimimasa Tamura</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04738v1-abstract-short" style="display: inline;"> Advanced Driver Assistance Systems (ADAS) alert drivers during safety-critical scenarios but often provide superfluous alerts due to a lack of consideration for drivers' knowledge or scene awareness. Modeling these aspects together in a data-driven way is challenging due to the scarcity of critical scenario data with in-cabin driver state and world state recorded together. We explore the benefits… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04738v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04738v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04738v1-abstract-full" style="display: none;"> Advanced Driver Assistance Systems (ADAS) alert drivers during safety-critical scenarios but often provide superfluous alerts due to a lack of consideration for drivers' knowledge or scene awareness. Modeling these aspects together in a data-driven way is challenging due to the scarcity of critical scenario data with in-cabin driver state and world state recorded together. We explore the benefits of driver modeling in the context of Forward Collision Warning (FCW) systems. Working with real-world video dataset of on-road FCW deployments, we collect observers' subjective validity rating of the deployed alerts. We also annotate participants' gaze-to-objects and extract 3D trajectories of the ego vehicle and other vehicles semi-automatically. We generate a risk estimate of the scene and the drivers' perception in a two step process: First, we model the movement of vehicles in a given scenario as a joint trajectory forecasting problem. Then, we reason about the drivers' risk perception of the scene by counterfactually modifying the input to the forecasting model to represent the drivers' actual observations of vehicles in the scene. The difference in these behaviours gives us an estimate of driver behaviour that accounts for their actual (inattentive) observations and their downstream effect on overall scene risk. We compare both a learned scene representation as well as a more traditional ``worse-case'' deceleration model to achieve the future trajectory forecast. Our experiments show that using this risk formulation to generate FCW alerts may lead to improved false positive rate of FCWs and improved FCW timing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04738v1-abstract-full').style.display = 'none'; document.getElementById('2409.04738v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09810">arXiv:2406.09810</a> <span> [<a href="https://arxiv.org/pdf/2406.09810">pdf</a>, <a href="https://arxiv.org/format/2406.09810">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Think Deep and Fast: Learning Neural Nonlinear Opinion Dynamics from Inverse Dynamic Games for Split-Second Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+N+E">Naomi Ehrich Leonard</a>, <a href="/search/cs?searchtype=author&query=Fisac%2C+J+F">Jaime Fern谩ndez Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09810v1-abstract-short" style="display: inline;"> Non-cooperative interactions commonly occur in multi-agent scenarios such as car racing, where an ego vehicle can choose to overtake the rival, or stay behind it until a safe overtaking "corridor" opens. While an expert human can do well at making such time-sensitive decisions, the development of safe and efficient game-theoretic trajectory planners capable of rapidly reasoning discrete options is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09810v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09810v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09810v1-abstract-full" style="display: none;"> Non-cooperative interactions commonly occur in multi-agent scenarios such as car racing, where an ego vehicle can choose to overtake the rival, or stay behind it until a safe overtaking "corridor" opens. While an expert human can do well at making such time-sensitive decisions, the development of safe and efficient game-theoretic trajectory planners capable of rapidly reasoning discrete options is yet to be fully addressed. The recently developed nonlinear opinion dynamics (NOD) show promise in enabling fast opinion formation and avoiding safety-critical deadlocks. However, it remains an open challenge to determine the model parameters of NOD automatically and adaptively, accounting for the ever-changing environment of interaction. In this work, we propose for the first time a learning-based, game-theoretic approach to synthesize a Neural NOD model from expert demonstrations, given as a dataset containing (possibly incomplete) state and action trajectories of interacting agents. The learned NOD can be used by existing dynamic game solvers to plan decisively while accounting for the predicted change of other agents' intents, thus enabling situational awareness in planning. We demonstrate Neural NOD's ability to make fast and robust decisions in a simulated autonomous racing example, leading to tangible improvements in safety and overtaking performance over state-of-the-art data-driven game-theoretic planning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09810v1-abstract-full').style.display = 'none'; document.getElementById('2406.09810v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05956">arXiv:2405.05956</a> <span> [<a href="https://arxiv.org/pdf/2405.05956">pdf</a>, <a href="https://arxiv.org/format/2405.05956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Probing Multimodal LLMs as World Models for Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sreeram%2C+S">Shiva Sreeram</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tsun-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&query=Maalouf%2C+A">Alaa Maalouf</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05956v2-abstract-short" style="display: inline;"> We provide a sober look at the application of Multimodal Large Language Models (MLLMs) in autonomous driving, challenging common assumptions about their ability to interpret dynamic driving scenarios. Despite advances in models like GPT-4o, their performance in complex driving environments remains largely unexplored. Our experimental study assesses various MLLMs as world models using in-car camera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05956v2-abstract-full').style.display = 'inline'; document.getElementById('2405.05956v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05956v2-abstract-full" style="display: none;"> We provide a sober look at the application of Multimodal Large Language Models (MLLMs) in autonomous driving, challenging common assumptions about their ability to interpret dynamic driving scenarios. Despite advances in models like GPT-4o, their performance in complex driving environments remains largely unexplored. Our experimental study assesses various MLLMs as world models using in-car camera perspectives and reveals that while these models excel at interpreting individual images, they struggle to synthesize coherent narratives across frames, leading to considerable inaccuracies in understanding (i) ego vehicle dynamics, (ii) interactions with other road actors, (iii) trajectory planning, and (iv) open-set scene reasoning. We introduce the Eval-LLM-Drive dataset and DriveSim simulator to enhance our evaluation, highlighting gaps in current MLLM capabilities and the need for improved models in dynamic real-world environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05956v2-abstract-full').style.display = 'none'; document.getElementById('2405.05956v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://github.com/sreeramsa/DriveSim https://www.youtube.com/watch?v=Fs8jgngOJzU</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14174">arXiv:2402.14174</a> <span> [<a href="https://arxiv.org/pdf/2402.14174">pdf</a>, <a href="https://arxiv.org/format/2402.14174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Blending Data-Driven Priors in Dynamic Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lidard%2C+J">Justin Lidard</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/cs?searchtype=author&query=Hancock%2C+A">Asher Hancock</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixu Zhang</a>, <a href="/search/cs?searchtype=author&query=Contreras%2C+A+G">Albert Gim贸 Contreras</a>, <a href="/search/cs?searchtype=author&query=Modi%2C+V">Vikash Modi</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+N+E">Naomi Ehrich Leonard</a>, <a href="/search/cs?searchtype=author&query=Santos%2C+M">Mar铆a Santos</a>, <a href="/search/cs?searchtype=author&query=Fisac%2C+J+F">Jaime Fern谩ndez Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14174v3-abstract-short" style="display: inline;"> As intelligent robots like autonomous vehicles become increasingly deployed in the presence of people, the extent to which these systems should leverage model-based game-theoretic planners versus data-driven policies for safe, interaction-aware motion planning remains an open question. Existing dynamic game formulations assume all agents are task-driven and behave optimally. However, in reality, h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14174v3-abstract-full').style.display = 'inline'; document.getElementById('2402.14174v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14174v3-abstract-full" style="display: none;"> As intelligent robots like autonomous vehicles become increasingly deployed in the presence of people, the extent to which these systems should leverage model-based game-theoretic planners versus data-driven policies for safe, interaction-aware motion planning remains an open question. Existing dynamic game formulations assume all agents are task-driven and behave optimally. However, in reality, humans tend to deviate from the decisions prescribed by these models, and their behavior is better approximated under a noisy-rational paradigm. In this work, we investigate a principled methodology to blend a data-driven reference policy with an optimization-based game-theoretic policy. We formulate KLGame, an algorithm for solving non-cooperative dynamic game with Kullback-Leibler (KL) regularization with respect to a general, stochastic, and possibly multi-modal reference policy. Our method incorporates, for each decision maker, a tunable parameter that permits modulation between task-driven and data-driven behaviors. We propose an efficient algorithm for computing multi-modal approximate feedback Nash equilibrium strategies of KLGame in real time. Through a series of simulated and real-world autonomous driving scenarios, we demonstrate that KLGame policies can more effectively incorporate guidance from the reference policy and account for noisily-rational human behaviors versus non-regularized baselines. Website with additional information, videos, and code: https://kl-games.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14174v3-abstract-full').style.display = 'none'; document.getElementById('2402.14174v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05893">arXiv:2402.05893</a> <span> [<a href="https://arxiv.org/pdf/2402.05893">pdf</a>, <a href="https://arxiv.org/format/2402.05893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Personalizing Driver Safety Interfaces via Driver Cognitive Factors Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sumner%2C+E+S">Emily S Sumner</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+J">Jean Costa</a>, <a href="/search/cs?searchtype=author&query=Gopinath%2C+D+E">Deepak E Gopinath</a>, <a href="/search/cs?searchtype=author&query=Kimani%2C+E">Everlyne Kimani</a>, <a href="/search/cs?searchtype=author&query=Hakimi%2C+S">Shabnam Hakimi</a>, <a href="/search/cs?searchtype=author&query=Morgan%2C+A">Allison Morgan</a>, <a href="/search/cs?searchtype=author&query=Best%2C+A">Andrew Best</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hieu Nguyen</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D+J">Daniel J Brooks</a>, <a href="/search/cs?searchtype=author&query=Haq%2C+B+u">Bassam ul Haq</a>, <a href="/search/cs?searchtype=author&query=Patrikalakis%2C+A">Andrew Patrikalakis</a>, <a href="/search/cs?searchtype=author&query=Yasuda%2C+H">Hiroshi Yasuda</a>, <a href="/search/cs?searchtype=author&query=Sieck%2C+K">Kate Sieck</a>, <a href="/search/cs?searchtype=author&query=Balachandran%2C+A">Avinash Balachandran</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tiffany Chen</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05893v1-abstract-short" style="display: inline;"> Recent advances in AI and intelligent vehicle technology hold promise to revolutionize mobility and transportation, in the form of advanced driving assistance (ADAS) interfaces. Although it is widely recognized that certain cognitive factors, such as impulsivity and inhibitory control, are related to risky driving behavior, play a significant role in on-road risk-taking, existing systems fail to l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05893v1-abstract-full').style.display = 'inline'; document.getElementById('2402.05893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05893v1-abstract-full" style="display: none;"> Recent advances in AI and intelligent vehicle technology hold promise to revolutionize mobility and transportation, in the form of advanced driving assistance (ADAS) interfaces. Although it is widely recognized that certain cognitive factors, such as impulsivity and inhibitory control, are related to risky driving behavior, play a significant role in on-road risk-taking, existing systems fail to leverage such factors. Varying levels of these cognitive factors could influence the effectiveness and acceptance of driver safety interfaces. We demonstrate an approach for personalizing driver interaction via driver safety interfaces that are triggered based on a learned recurrent neural network. The network is trained from a population of human drivers to infer impulsivity and inhibitory control from recent driving behavior. Using a high-fidelity vehicle motion simulator, we demonstrate the ability to deduce these factors from driver behavior. We then use these inferred factors to make instantaneous determinations on whether or not to engage a driver safety interface. This interface aims to decrease a driver's speed during yellow lights and reduce their inclination to run through them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05893v1-abstract-full').style.display = 'none'; document.getElementById('2402.05893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01974">arXiv:2402.01974</a> <span> [<a href="https://arxiv.org/pdf/2402.01974">pdf</a>, <a href="https://arxiv.org/format/2402.01974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hypergraph-Transformer (HGT) for Interactive Event Prediction in Laparoscopic and Robotic Surgery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+L">Lianhao Yin</a>, <a href="/search/cs?searchtype=author&query=Ban%2C+Y">Yutong Ban</a>, <a href="/search/cs?searchtype=author&query=Eckhoff%2C+J">Jennifer Eckhoff</a>, <a href="/search/cs?searchtype=author&query=Meireles%2C+O">Ozanan Meireles</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01974v1-abstract-short" style="display: inline;"> Understanding and anticipating intraoperative events and actions is critical for intraoperative assistance and decision-making during minimally invasive surgery. Automated prediction of events, actions, and the following consequences is addressed through various computational approaches with the objective of augmenting surgeons' perception and decision-making capabilities. We propose a predictive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01974v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01974v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01974v1-abstract-full" style="display: none;"> Understanding and anticipating intraoperative events and actions is critical for intraoperative assistance and decision-making during minimally invasive surgery. Automated prediction of events, actions, and the following consequences is addressed through various computational approaches with the objective of augmenting surgeons' perception and decision-making capabilities. We propose a predictive neural network that is capable of understanding and predicting critical interactive aspects of surgical workflow from intra-abdominal video, while flexibly leveraging surgical knowledge graphs. The approach incorporates a hypergraph-transformer (HGT) structure that encodes expert knowledge into the network design and predicts the hidden embedding of the graph. We verify our approach on established surgical datasets and applications, including the detection and prediction of action triplets, and the achievement of the Critical View of Safety (CVS). Moreover, we address specific, safety-related tasks, such as predicting the clipping of cystic duct or artery without prior achievement of the CVS. Our results demonstrate the superiority of our approach compared to unstructured alternatives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01974v1-abstract-full').style.display = 'none'; document.getElementById('2402.01974v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17642">arXiv:2310.17642</a> <span> [<a href="https://arxiv.org/pdf/2310.17642">pdf</a>, <a href="https://arxiv.org/format/2310.17642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Drive Anywhere: Generalizable End-to-end Autonomous Driving with Multi-modal Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tsun-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&query=Maalouf%2C+A">Alaa Maalouf</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+W">Wei Xiao</a>, <a href="/search/cs?searchtype=author&query=Ban%2C+Y">Yutong Ban</a>, <a href="/search/cs?searchtype=author&query=Amini%2C+A">Alexander Amini</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17642v1-abstract-short" style="display: inline;"> As autonomous driving technology matures, end-to-end methodologies have emerged as a leading strategy, promising seamless integration from perception to control via deep learning. However, existing systems grapple with challenges such as unexpected open set environments and the complexity of black-box models. At the same time, the evolution of deep learning introduces larger, multimodal foundation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17642v1-abstract-full').style.display = 'inline'; document.getElementById('2310.17642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17642v1-abstract-full" style="display: none;"> As autonomous driving technology matures, end-to-end methodologies have emerged as a leading strategy, promising seamless integration from perception to control via deep learning. However, existing systems grapple with challenges such as unexpected open set environments and the complexity of black-box models. At the same time, the evolution of deep learning introduces larger, multimodal foundational models, offering multi-modal visual and textual understanding. In this paper, we harness these multimodal foundation models to enhance the robustness and adaptability of autonomous driving systems, enabling out-of-distribution, end-to-end, multimodal, and more explainable autonomy. Specifically, we present an approach to apply end-to-end open-set (any environment/scene) autonomous driving that is capable of providing driving decisions from representations queryable by image and text. To do so, we introduce a method to extract nuanced spatial (pixel/patch-aligned) features from transformers to enable the encapsulation of both spatial and semantic features. Our approach (i) demonstrates unparalleled results in diverse tests while achieving significantly greater robustness in out-of-distribution situations, and (ii) allows the incorporation of latent space simulation (via text) for improved training (data augmentation via text) and policy debugging. We encourage the reader to check our explainer video at https://www.youtube.com/watch?v=4n-DJf8vXxo&feature=youtu.be and to view the code and demos on our project webpage at https://drive-anywhere.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17642v1-abstract-full').style.display = 'none'; document.getElementById('2310.17642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project webpage: https://drive-anywhere.github.io Explainer video: https://www.youtube.com/watch?v=4n-DJf8vXxo&feature=youtu.be</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.17600">arXiv:2305.17600</a> <span> [<a href="https://arxiv.org/pdf/2305.17600">pdf</a>, <a href="https://arxiv.org/format/2305.17600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> NashFormer: Leveraging Local Nash Equilibria for Semantically Diverse Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lidard%2C+J">Justin Lidard</a>, <a href="/search/cs?searchtype=author&query=So%2C+O">Oswin So</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanxia Zhang</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+X">Xiongyi Cui</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=Kuo%2C+Y">Yen-Ling Kuo</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J">John Leonard</a>, <a href="/search/cs?searchtype=author&query=Balachandran%2C+A">Avinash Balachandran</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+N">Naomi Leonard</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.17600v3-abstract-short" style="display: inline;"> Interactions between road agents present a significant challenge in trajectory prediction, especially in cases involving multiple agents. Because existing diversity-aware predictors do not account for the interactive nature of multi-agent predictions, they may miss these important interaction outcomes. In this paper, we propose NashFormer, a framework for trajectory prediction that leverages game-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17600v3-abstract-full').style.display = 'inline'; document.getElementById('2305.17600v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.17600v3-abstract-full" style="display: none;"> Interactions between road agents present a significant challenge in trajectory prediction, especially in cases involving multiple agents. Because existing diversity-aware predictors do not account for the interactive nature of multi-agent predictions, they may miss these important interaction outcomes. In this paper, we propose NashFormer, a framework for trajectory prediction that leverages game-theoretic inverse reinforcement learning to improve coverage of multi-modal predictions. We use a training-time game-theoretic analysis as an auxiliary loss resulting in improved coverage and accuracy without presuming a taxonomy of actions for the agents. We demonstrate our approach on the interactive split of the Waymo Open Motion Dataset, including four subsets involving scenarios with high interaction complexity. Experiment results show that our predictor produces accurate predictions while covering $33\%$ more potential interactions versus a baseline model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17600v3-abstract-full').style.display = 'none'; document.getElementById('2305.17600v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14797">arXiv:2305.14797</a> <span> [<a href="https://arxiv.org/pdf/2305.14797">pdf</a>, <a href="https://arxiv.org/format/2305.14797">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Multi-Abstractive Neural Controller: An Efficient Hierarchical Control Architecture for Interactive Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiao Li</a>, <a href="/search/cs?searchtype=author&query=Gilitschenski%2C+I">Igor Gilitschenski</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14797v1-abstract-short" style="display: inline;"> As learning-based methods make their way from perception systems to planning/control stacks, robot control systems have started to enjoy the benefits that data-driven methods provide. Because control systems directly affect the motion of the robot, data-driven methods, especially black box approaches, need to be used with caution considering aspects such as stability and interpretability. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14797v1-abstract-full').style.display = 'inline'; document.getElementById('2305.14797v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14797v1-abstract-full" style="display: none;"> As learning-based methods make their way from perception systems to planning/control stacks, robot control systems have started to enjoy the benefits that data-driven methods provide. Because control systems directly affect the motion of the robot, data-driven methods, especially black box approaches, need to be used with caution considering aspects such as stability and interpretability. In this paper, we describe a differentiable and hierarchical control architecture. The proposed representation, called \textit{multi-abstractive neural controller}, uses the input image to control the transitions within a novel discrete behavior planner (referred to as the visual automaton generative network, or \textit{vAGN}). The output of a vAGN controls the parameters of a set of dynamic movement primitives which provides the system controls. We train this neural controller with real-world driving data via behavior cloning and show improved explainability, sample efficiency, and similarity to human driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14797v1-abstract-full').style.display = 'none'; document.getElementById('2305.14797v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.17010">arXiv:2303.17010</a> <span> [<a href="https://arxiv.org/pdf/2303.17010">pdf</a>, <a href="https://arxiv.org/format/2303.17010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Specification-Guided Data Aggregation for Semantically Aware Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shah%2C+A">Ameesh Shah</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Gideon%2C+J">John Gideon</a>, <a href="/search/cs?searchtype=author&query=Yalcinkaya%2C+B">Beyazit Yalcinkaya</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Seshia%2C+S+A">Sanjit A. Seshia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.17010v1-abstract-short" style="display: inline;"> Advancements in simulation and formal methods-guided environment sampling have enabled the rigorous evaluation of machine learning models in a number of safety-critical scenarios, such as autonomous driving. Application of these environment sampling techniques towards improving the learned models themselves has yet to be fully exploited. In this work, we introduce a novel method for improving imit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17010v1-abstract-full').style.display = 'inline'; document.getElementById('2303.17010v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.17010v1-abstract-full" style="display: none;"> Advancements in simulation and formal methods-guided environment sampling have enabled the rigorous evaluation of machine learning models in a number of safety-critical scenarios, such as autonomous driving. Application of these environment sampling techniques towards improving the learned models themselves has yet to be fully exploited. In this work, we introduce a novel method for improving imitation-learned models in a semantically aware fashion by leveraging specification-guided sampling techniques as a means of aggregating expert data in new environments. Specifically, we create a set of formal specifications as a means of partitioning the space of possible environments into semantically similar regions, and identify elements of this partition where our learned imitation behaves most differently from the expert. We then aggregate expert data on environments in these identified regions, leading to more accurate imitation of the expert's behavior semantics. We instantiate our approach in a series of experiments in the CARLA driving simulator, and demonstrate that our approach leads to models that are more accurate than those learned with other environment sampling methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17010v1-abstract-full').style.display = 'none'; document.getElementById('2303.17010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.10814">arXiv:2210.10814</a> <span> [<a href="https://arxiv.org/pdf/2210.10814">pdf</a>, <a href="https://arxiv.org/format/2210.10814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> MPOGames: Efficient Multimodal Partially Observable Dynamic Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=So%2C+O">Oswin So</a>, <a href="/search/cs?searchtype=author&query=Drews%2C+P">Paul Drews</a>, <a href="/search/cs?searchtype=author&query=Balch%2C+T">Thomas Balch</a>, <a href="/search/cs?searchtype=author&query=Dimitrov%2C+V">Velin Dimitrov</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Theodorou%2C+E+A">Evangelos A. Theodorou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.10814v2-abstract-short" style="display: inline;"> Game theoretic methods have become popular for planning and prediction in situations involving rich multi-agent interactions. However, these methods often assume the existence of a single local Nash equilibria and are hence unable to handle uncertainty in the intentions of different agents. While maximum entropy (MaxEnt) dynamic games try to address this issue, practical approaches solve for MaxEn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10814v2-abstract-full').style.display = 'inline'; document.getElementById('2210.10814v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.10814v2-abstract-full" style="display: none;"> Game theoretic methods have become popular for planning and prediction in situations involving rich multi-agent interactions. However, these methods often assume the existence of a single local Nash equilibria and are hence unable to handle uncertainty in the intentions of different agents. While maximum entropy (MaxEnt) dynamic games try to address this issue, practical approaches solve for MaxEnt Nash equilibria using linear-quadratic approximations which are restricted to unimodal responses and unsuitable for scenarios with multiple local Nash equilibria. By reformulating the problem as a POMDP, we propose MPOGames, a method for efficiently solving MaxEnt dynamic games that captures the interactions between local Nash equilibria. We show the importance of uncertainty-aware game theoretic methods via a two-agent merge case study. Finally, we prove the real-time capabilities of our approach with hardware experiments on a 1/10th scale car platform. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10814v2-abstract-full').style.display = 'none'; document.getElementById('2210.10814v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICRA 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09619">arXiv:2207.09619</a> <span> [<a href="https://arxiv.org/pdf/2207.09619">pdf</a>, <a href="https://arxiv.org/format/2207.09619">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning Latent Traits for Simulated Cooperative Driving Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeCastro%2C+J+A">Jonathan A. DeCastro</a>, <a href="/search/cs?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Sumner%2C+E">Emily Sumner</a>, <a href="/search/cs?searchtype=author&query=Hakimi%2C+S">Shabnam Hakimi</a>, <a href="/search/cs?searchtype=author&query=Stent%2C+S">Simon Stent</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09619v1-abstract-short" style="display: inline;"> To construct effective teaming strategies between humans and AI systems in complex, risky situations requires an understanding of individual preferences and behaviors of humans. Previously this problem has been treated in case-specific or data-agnostic ways. In this paper, we build a framework capable of capturing a compact latent representation of the human in terms of their behavior and preferen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09619v1-abstract-full').style.display = 'inline'; document.getElementById('2207.09619v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09619v1-abstract-full" style="display: none;"> To construct effective teaming strategies between humans and AI systems in complex, risky situations requires an understanding of individual preferences and behaviors of humans. Previously this problem has been treated in case-specific or data-agnostic ways. In this paper, we build a framework capable of capturing a compact latent representation of the human in terms of their behavior and preferences based on data from a simulated population of drivers. Our framework leverages, to the extent available, knowledge of individual preferences and types from samples within the population to deploy interaction policies appropriate for specific drivers. We then build a lightweight simulation environment, HMIway-env, for modelling one form of distracted driving behavior, and use it to generate data for different driver types and train intervention policies. We finally use this environment to quantify both the ability to discriminate drivers and the effectiveness of intervention policies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09619v1-abstract-full').style.display = 'none'; document.getElementById('2207.09619v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.04421">arXiv:2203.04421</a> <span> [<a href="https://arxiv.org/pdf/2203.04421">pdf</a>, <a href="https://arxiv.org/format/2203.04421">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Smooth Attention Prior for Multi-Agent Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zhangjie Cao</a>, <a href="/search/cs?searchtype=author&query=B%C4%B1y%C4%B1k%2C+E">Erdem B谋y谋k</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Sadigh%2C+D">Dorsa Sadigh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.04421v2-abstract-short" style="display: inline;"> Multi-agent interactions are important to model for forecasting other agents' behaviors and trajectories. At a certain time, to forecast a reasonable future trajectory, each agent needs to pay attention to the interactions with only a small group of most relevant agents instead of unnecessarily paying attention to all the other agents. However, existing attention modeling works ignore that human a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.04421v2-abstract-full').style.display = 'inline'; document.getElementById('2203.04421v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.04421v2-abstract-full" style="display: none;"> Multi-agent interactions are important to model for forecasting other agents' behaviors and trajectories. At a certain time, to forecast a reasonable future trajectory, each agent needs to pay attention to the interactions with only a small group of most relevant agents instead of unnecessarily paying attention to all the other agents. However, existing attention modeling works ignore that human attention in driving does not change rapidly, and may introduce fluctuating attention across time steps. In this paper, we formulate an attention model for multi-agent interactions based on a total variation temporal smoothness prior and propose a trajectory prediction architecture that leverages the knowledge of these attended interactions. We demonstrate how the total variation attention prior along with the new sequence prediction loss terms leads to smoother attention and more sample-efficient learning of multi-agent trajectory prediction, and show its advantages in terms of prediction accuracy by comparing it with the state-of-the-art approaches on both synthetic and naturalistic driving data. We demonstrate the performance of our algorithm for trajectory prediction on the INTERACTION dataset on our website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.04421v2-abstract-full').style.display = 'none'; document.getElementById('2203.04421v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICRA 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.13402">arXiv:2202.13402</a> <span> [<a href="https://arxiv.org/pdf/2202.13402">pdf</a>, <a href="https://arxiv.org/format/2202.13402">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Concept Graph Neural Networks for Surgical Video Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ban%2C+Y">Yutong Ban</a>, <a href="/search/cs?searchtype=author&query=Eckhoff%2C+J+A">Jennifer A. Eckhoff</a>, <a href="/search/cs?searchtype=author&query=Ward%2C+T+M">Thomas M. Ward</a>, <a href="/search/cs?searchtype=author&query=Hashimoto%2C+D+A">Daniel A. Hashimoto</a>, <a href="/search/cs?searchtype=author&query=Meireles%2C+O+R">Ozanan R. Meireles</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.13402v2-abstract-short" style="display: inline;"> We constantly integrate our knowledge and understanding of the world to enhance our interpretation of what we see. This ability is crucial in application domains which entail reasoning about multiple entities and concepts, such as AI-augmented surgery. In this paper, we propose a novel way of integrating conceptual knowledge into temporal analysis tasks via temporal concept graph networks. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.13402v2-abstract-full').style.display = 'inline'; document.getElementById('2202.13402v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.13402v2-abstract-full" style="display: none;"> We constantly integrate our knowledge and understanding of the world to enhance our interpretation of what we see. This ability is crucial in application domains which entail reasoning about multiple entities and concepts, such as AI-augmented surgery. In this paper, we propose a novel way of integrating conceptual knowledge into temporal analysis tasks via temporal concept graph networks. In the proposed networks, a global knowledge graph is incorporated into the temporal analysis of surgical instances, learning the meaning of concepts and relations as they apply to the data. We demonstrate our results in surgical video data for tasks such as verification of critical view of safety, as well as estimation of Parkland grading scale. The results show that our method improves the recognition and detection of complex benchmarks as well as enables other analytic applications of interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.13402v2-abstract-full').style.display = 'none'; document.getElementById('2202.13402v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.09741">arXiv:2110.09741</a> <span> [<a href="https://arxiv.org/pdf/2110.09741">pdf</a>, <a href="https://arxiv.org/format/2110.09741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Trajectory Prediction with Linguistic Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kuo%2C+Y">Yen-Ling Kuo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=Barbu%2C+A">Andrei Barbu</a>, <a href="/search/cs?searchtype=author&query=McGill%2C+S+G">Stephen G. McGill</a>, <a href="/search/cs?searchtype=author&query=Katz%2C+B">Boris Katz</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J+J">John J. Leonard</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.09741v2-abstract-short" style="display: inline;"> Language allows humans to build mental models that interpret what is happening around them resulting in more accurate long-term predictions. We present a novel trajectory prediction model that uses linguistic intermediate representations to forecast trajectories, and is trained using trajectory samples with partially-annotated captions. The model learns the meaning of each of the words without dir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09741v2-abstract-full').style.display = 'inline'; document.getElementById('2110.09741v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.09741v2-abstract-full" style="display: none;"> Language allows humans to build mental models that interpret what is happening around them resulting in more accurate long-term predictions. We present a novel trajectory prediction model that uses linguistic intermediate representations to forecast trajectories, and is trained using trajectory samples with partially-annotated captions. The model learns the meaning of each of the words without direct per-word supervision. At inference time, it generates a linguistic description of trajectories which captures maneuvers and interactions over an extended time interval. This generated description is used to refine predictions of the trajectories of multiple agents. We train and validate our model on the Argoverse dataset, and demonstrate improved accuracy results in trajectory prediction. In addition, our model is more interpretable: it presents part of its reasoning in plain language as captions, which can aid model development and can aid in building confidence in the model before deploying it. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09741v2-abstract-full').style.display = 'none'; document.getElementById('2110.09741v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICRA 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.08750">arXiv:2110.08750</a> <span> [<a href="https://arxiv.org/pdf/2110.08750">pdf</a>, <a href="https://arxiv.org/format/2110.08750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TIP: Task-Informed Motion Prediction for Intelligent Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Jasour%2C+A">Ashkan Jasour</a>, <a href="/search/cs?searchtype=author&query=McGill%2C+S+G">Stephen G. McGill</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J+J">John J. Leonard</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+B+C">Brian C. Williams</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.08750v2-abstract-short" style="display: inline;"> When predicting trajectories of road agents, motion predictors usually approximate the future distribution by a limited number of samples. This constraint requires the predictors to generate samples that best support the task given task specifications. However, existing predictors are often optimized and evaluated via task-agnostic measures without accounting for the use of predictions in downstre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.08750v2-abstract-full').style.display = 'inline'; document.getElementById('2110.08750v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.08750v2-abstract-full" style="display: none;"> When predicting trajectories of road agents, motion predictors usually approximate the future distribution by a limited number of samples. This constraint requires the predictors to generate samples that best support the task given task specifications. However, existing predictors are often optimized and evaluated via task-agnostic measures without accounting for the use of predictions in downstream tasks, and thus could result in sub-optimal task performance. In this paper, we propose a task-informed motion prediction model that better supports the tasks through its predictions, by jointly reasoning about prediction accuracy and the utility of the downstream tasks, which is commonly used to evaluate the task performance. The task utility function does not require the full task information, but rather a specification of the utility of the task, resulting in predictors that serve a wide range of downstream tasks. We demonstrate our approach on two use cases of common decision making tasks and their utility functions, in the context of autonomous driving and parallel autonomy. Experiment results show that our predictor produces accurate predictions that improve the task performance by a large margin in both tasks when compared to task-agnostic baselines on the Waymo Open Motion dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.08750v2-abstract-full').style.display = 'none'; document.getElementById('2110.08750v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.08610">arXiv:2110.08610</a> <span> [<a href="https://arxiv.org/pdf/2110.08610">pdf</a>, <a href="https://arxiv.org/format/2110.08610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MAAD: A Model and Dataset for "Attended Awareness" in Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Stent%2C+S">Simon Stent</a>, <a href="/search/cs?searchtype=author&query=Terahata%2C+K">Katsuya Terahata</a>, <a href="/search/cs?searchtype=author&query=Fletcher%2C+L">Luke Fletcher</a>, <a href="/search/cs?searchtype=author&query=Argall%2C+B">Brenna Argall</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J">John Leonard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.08610v1-abstract-short" style="display: inline;"> We propose a computational model to estimate a person's attended awareness of their environment. We define attended awareness to be those parts of a potentially dynamic scene which a person has attended to in recent history and which they are still likely to be physically aware of. Our model takes as input scene information in the form of a video and noisy gaze estimates, and outputs visual salien… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.08610v1-abstract-full').style.display = 'inline'; document.getElementById('2110.08610v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.08610v1-abstract-full" style="display: none;"> We propose a computational model to estimate a person's attended awareness of their environment. We define attended awareness to be those parts of a potentially dynamic scene which a person has attended to in recent history and which they are still likely to be physically aware of. Our model takes as input scene information in the form of a video and noisy gaze estimates, and outputs visual saliency, a refined gaze estimate, and an estimate of the person's attended awareness. In order to test our model, we capture a new dataset with a high-precision gaze tracker including 24.5 hours of gaze sequences from 23 subjects attending to videos of driving scenes. The dataset also contains third-party annotations of the subjects' attended awareness based on observations of their scan path. Our results show that our model is able to reasonably estimate attended awareness in a controlled setting, and in the future could potentially be extended to real egocentric driving data to help enable more effective ahead-of-time warnings in safety systems and thereby augment driver performance. We also demonstrate our model's effectiveness on the tasks of saliency, gaze calibration, and denoising, using both our dataset and an existing saliency dataset. We make our model and dataset available at https://github.com/ToyotaResearchInstitute/att-aware/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.08610v1-abstract-full').style.display = 'none'; document.getElementById('2110.08610v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 13 figures, 14 tables, Accepted at EPIC@ICCV 2021 Workshop. Main paper + Supplementary Material</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.02344">arXiv:2110.02344</a> <span> [<a href="https://arxiv.org/pdf/2110.02344">pdf</a>, <a href="https://arxiv.org/format/2110.02344">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HYPER: Learned Hybrid Trajectory Prediction via Factored Inference and Adaptive Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Gilitschenski%2C+I">Igor Gilitschenski</a>, <a href="/search/cs?searchtype=author&query=Jasour%2C+A">Ashkan Jasour</a>, <a href="/search/cs?searchtype=author&query=McGill%2C+S+G">Stephen G. McGill</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J+J">John J. Leonard</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+B+C">Brian C. Williams</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.02344v1-abstract-short" style="display: inline;"> Modeling multi-modal high-level intent is important for ensuring diversity in trajectory prediction. Existing approaches explore the discrete nature of human intent before predicting continuous trajectories, to improve accuracy and support explainability. However, these approaches often assume the intent to remain fixed over the prediction horizon, which is problematic in practice, especially over… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02344v1-abstract-full').style.display = 'inline'; document.getElementById('2110.02344v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.02344v1-abstract-full" style="display: none;"> Modeling multi-modal high-level intent is important for ensuring diversity in trajectory prediction. Existing approaches explore the discrete nature of human intent before predicting continuous trajectories, to improve accuracy and support explainability. However, these approaches often assume the intent to remain fixed over the prediction horizon, which is problematic in practice, especially over longer horizons. To overcome this limitation, we introduce HYPER, a general and expressive hybrid prediction framework that models evolving human intent. By modeling traffic agents as a hybrid discrete-continuous system, our approach is capable of predicting discrete intent changes over time. We learn the probabilistic hybrid model via a maximum likelihood estimation problem and leverage neural proposal distributions to sample adaptively from the exponentially growing discrete space. The overall approach affords a better trade-off between accuracy and coverage. We train and validate our model on the Argoverse dataset, and demonstrate its effectiveness through comprehensive ablation studies and comparisons with state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02344v1-abstract-full').style.display = 'none'; document.getElementById('2110.02344v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 10 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.01851">arXiv:2108.01851</a> <span> [<a href="https://arxiv.org/pdf/2108.01851">pdf</a>, <a href="https://arxiv.org/format/2108.01851">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Risk Conditioned Neural Motion Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Meng Feng</a>, <a href="/search/cs?searchtype=author&query=Jasour%2C+A">Ashkan Jasour</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+B">Brian Williams</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.01851v1-abstract-short" style="display: inline;"> Risk-bounded motion planning is an important yet difficult problem for safety-critical tasks. While existing mathematical programming methods offer theoretical guarantees in the context of constrained Markov decision processes, they either lack scalability in solving larger problems or produce conservative plans. Recent advances in deep reinforcement learning improve scalability by learning policy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.01851v1-abstract-full').style.display = 'inline'; document.getElementById('2108.01851v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.01851v1-abstract-full" style="display: none;"> Risk-bounded motion planning is an important yet difficult problem for safety-critical tasks. While existing mathematical programming methods offer theoretical guarantees in the context of constrained Markov decision processes, they either lack scalability in solving larger problems or produce conservative plans. Recent advances in deep reinforcement learning improve scalability by learning policy networks as function approximators. In this paper, we propose an extension of soft actor critic model to estimate the execution risk of a plan through a risk critic and produce risk-bounded policies efficiently by adding an extra risk term in the loss function of the policy network. We define the execution risk in an accurate form, as opposed to approximating it through a summation of immediate risks at each time step that leads to conservative plans. Our proposed model is conditioned on a continuous spectrum of risk bounds, allowing the user to adjust the risk-averse level of the agent on the fly. Through a set of experiments, we show the advantage of our model in terms of both computational time and plan quality, compared to a state-of-the-art mathematical programming baseline, and validate its performance in more complicated scenarios, including nonlinear dynamics and larger state space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.01851v1-abstract-full').style.display = 'none'; document.getElementById('2108.01851v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IROS'21. Author version with 7 pages, 5 figures, 2 tables, and 1 algorithm</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.04642">arXiv:2105.04642</a> <span> [<a href="https://arxiv.org/pdf/2105.04642">pdf</a>, <a href="https://arxiv.org/format/2105.04642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SUPR-GAN: SUrgical PRediction GAN for Event Anticipation in Laparoscopic and Robotic Surgery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ban%2C+Y">Yutong Ban</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Eckhoff%2C+J+A">Jennifer A. Eckhoff</a>, <a href="/search/cs?searchtype=author&query=Ward%2C+T+M">Thomas M. Ward</a>, <a href="/search/cs?searchtype=author&query=Hashimoto%2C+D+A">Daniel A. Hashimoto</a>, <a href="/search/cs?searchtype=author&query=Kondo%2C+T">Taisei Kondo</a>, <a href="/search/cs?searchtype=author&query=Iwaki%2C+H">Hidekazu Iwaki</a>, <a href="/search/cs?searchtype=author&query=Meireles%2C+O+R">Ozanan R. Meireles</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.04642v2-abstract-short" style="display: inline;"> Comprehension of surgical workflow is the foundation upon which artificial intelligence (AI) and machine learning (ML) holds the potential to assist intraoperative decision-making and risk mitigation. In this work, we move beyond mere identification of past surgical phases, into the prediction of future surgical steps and specification of the transitions between them. We use a novel Generative Adv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.04642v2-abstract-full').style.display = 'inline'; document.getElementById('2105.04642v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.04642v2-abstract-full" style="display: none;"> Comprehension of surgical workflow is the foundation upon which artificial intelligence (AI) and machine learning (ML) holds the potential to assist intraoperative decision-making and risk mitigation. In this work, we move beyond mere identification of past surgical phases, into the prediction of future surgical steps and specification of the transitions between them. We use a novel Generative Adversarial Network (GAN) formulation to sample future surgical phases trajectories conditioned on past video frames from laparoscopic cholecystectomy (LC) videos and compare it to state-of-the-art approaches for surgical video analysis and alternative prediction methods. We demonstrate the GAN formulation's effectiveness through inferring and predicting the progress of LC videos. We quantify the horizon-accuracy trade-off and explored average performance, as well as the performance on the more challenging, and clinically relevant transitions between phases. Furthermore, we conduct a survey, asking 16 surgeons of different specialties and educational levels to qualitatively evaluate predicted surgery phases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.04642v2-abstract-full').style.display = 'none'; document.getElementById('2105.04642v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">RA-L ICRA 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.11991">arXiv:2011.11991</a> <span> [<a href="https://arxiv.org/pdf/2011.11991">pdf</a>, <a href="https://arxiv.org/ps/2011.11991">ps</a>, <a href="https://arxiv.org/format/2011.11991">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Discovering Avoidable Planner Failures of Autonomous Vehicles using Counterfactual Analysis in Behaviorally Diverse Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nishiyama%2C+D">Daisuke Nishiyama</a>, <a href="/search/cs?searchtype=author&query=Castro%2C+M+Y">Mario Ynocente Castro</a>, <a href="/search/cs?searchtype=author&query=Maruyama%2C+S">Shirou Maruyama</a>, <a href="/search/cs?searchtype=author&query=Shiroshita%2C+S">Shinya Shiroshita</a>, <a href="/search/cs?searchtype=author&query=Hamzaoui%2C+K">Karim Hamzaoui</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+Y">Yi Ouyang</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kuan-Hui Lee</a>, <a href="/search/cs?searchtype=author&query=Gaidon%2C+A">Adrien Gaidon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.11991v1-abstract-short" style="display: inline;"> Automated Vehicles require exhaustive testing in simulation to detect as many safety-critical failures as possible before deployment on public roads. In this work, we focus on the core decision-making component of autonomous robots: their planning algorithm. We introduce a planner testing framework that leverages recent progress in simulating behaviorally diverse traffic participants. Using large… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.11991v1-abstract-full').style.display = 'inline'; document.getElementById('2011.11991v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.11991v1-abstract-full" style="display: none;"> Automated Vehicles require exhaustive testing in simulation to detect as many safety-critical failures as possible before deployment on public roads. In this work, we focus on the core decision-making component of autonomous robots: their planning algorithm. We introduce a planner testing framework that leverages recent progress in simulating behaviorally diverse traffic participants. Using large scale search, we generate, detect, and characterize dynamic scenarios leading to collisions. In particular, we propose methods to distinguish between unavoidable and avoidable accidents, focusing especially on automatically finding planner-specific defects that must be corrected before deployment. Through experiments in complex multi-agent intersection scenarios, we show that our method can indeed find a wide range of critical planner failures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.11991v1-abstract-full').style.display = 'none'; document.getElementById('2011.11991v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> The 23rd IEEE International Conference on Intelligent Transportation Systems (ITSC2020) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.05741">arXiv:2011.05741</a> <span> [<a href="https://arxiv.org/pdf/2011.05741">pdf</a>, <a href="https://arxiv.org/ps/2011.05741">ps</a>, <a href="https://arxiv.org/format/2011.05741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Behaviorally Diverse Traffic Simulation via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shiroshita%2C+S">Shinya Shiroshita</a>, <a href="/search/cs?searchtype=author&query=Maruyama%2C+S">Shirou Maruyama</a>, <a href="/search/cs?searchtype=author&query=Nishiyama%2C+D">Daisuke Nishiyama</a>, <a href="/search/cs?searchtype=author&query=Castro%2C+M+Y">Mario Ynocente Castro</a>, <a href="/search/cs?searchtype=author&query=Hamzaoui%2C+K">Karim Hamzaoui</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kuan-Hui Lee</a>, <a href="/search/cs?searchtype=author&query=Gaidon%2C+A">Adrien Gaidon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.05741v1-abstract-short" style="display: inline;"> Traffic simulators are important tools in autonomous driving development. While continuous progress has been made to provide developers more options for modeling various traffic participants, tuning these models to increase their behavioral diversity while maintaining quality is often very challenging. This paper introduces an easily-tunable policy generation algorithm for autonomous driving agent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.05741v1-abstract-full').style.display = 'inline'; document.getElementById('2011.05741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.05741v1-abstract-full" style="display: none;"> Traffic simulators are important tools in autonomous driving development. While continuous progress has been made to provide developers more options for modeling various traffic participants, tuning these models to increase their behavioral diversity while maintaining quality is often very challenging. This paper introduces an easily-tunable policy generation algorithm for autonomous driving agents. The proposed algorithm balances diversity and driving skills by leveraging the representation and exploration abilities of deep reinforcement learning via a distinct policy set selector. Moreover, we present an algorithm utilizing intrinsic rewards to widen behavioral differences in the training. To provide quantitative assessments, we develop two trajectory-based evaluation metrics which measure the differences among policies and behavioral coverage. We experimentally show the effectiveness of our methods on several challenging intersection scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.05741v1-abstract-full').style.display = 'none'; document.getElementById('2011.05741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 16 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), 2020, pp. 2103-2110 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.07517">arXiv:2009.07517</a> <span> [<a href="https://arxiv.org/pdf/2009.07517">pdf</a>, <a href="https://arxiv.org/format/2009.07517">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> MATS: An Interpretable Trajectory Forecasting Representation for Planning and Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ivanovic%2C+B">Boris Ivanovic</a>, <a href="/search/cs?searchtype=author&query=Elhafsi%2C+A">Amine Elhafsi</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Gaidon%2C+A">Adrien Gaidon</a>, <a href="/search/cs?searchtype=author&query=Pavone%2C+M">Marco Pavone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.07517v2-abstract-short" style="display: inline;"> Reasoning about human motion is a core component of modern human-robot interactive systems. In particular, one of the main uses of behavior prediction in autonomous systems is to inform robot motion planning and control. However, a majority of planning and control algorithms reason about system dynamics rather than the predicted agent tracklets (i.e., ordered sets of waypoints) that are commonly o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.07517v2-abstract-full').style.display = 'inline'; document.getElementById('2009.07517v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.07517v2-abstract-full" style="display: none;"> Reasoning about human motion is a core component of modern human-robot interactive systems. In particular, one of the main uses of behavior prediction in autonomous systems is to inform robot motion planning and control. However, a majority of planning and control algorithms reason about system dynamics rather than the predicted agent tracklets (i.e., ordered sets of waypoints) that are commonly output by trajectory forecasting methods, which can hinder their integration. Towards this end, we propose Mixtures of Affine Time-varying Systems (MATS) as an output representation for trajectory forecasting that is more amenable to downstream planning and control use. Our approach leverages successful ideas from probabilistic trajectory forecasting works to learn dynamical system representations that are well-studied in the planning and control literature. We integrate our predictions with a proposed multimodal planning methodology and demonstrate significant computational efficiency improvements on a large-scale autonomous driving dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.07517v2-abstract-full').style.display = 'none'; document.getElementById('2009.07517v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 6 figures, 1 table. All code, models, and data can be found at https://github.com/StanfordASL/MATS . Conference on Robot Learning (CoRL) 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.00681">arXiv:2009.00681</a> <span> [<a href="https://arxiv.org/pdf/2009.00681">pdf</a>, <a href="https://arxiv.org/format/2009.00681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Aggregating Long-Term Context for Learning Laparoscopic and Robot-Assisted Surgical Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ban%2C+Y">Yutong Ban</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Ward%2C+T">Thomas Ward</a>, <a href="/search/cs?searchtype=author&query=Hashimoto%2C+D">Daniel Hashimoto</a>, <a href="/search/cs?searchtype=author&query=Kondo%2C+T">Taisei Kondo</a>, <a href="/search/cs?searchtype=author&query=Iwaki%2C+H">Hidekazu Iwaki</a>, <a href="/search/cs?searchtype=author&query=Meireles%2C+O">Ozanan Meireles</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.00681v4-abstract-short" style="display: inline;"> Analyzing surgical workflow is crucial for surgical assistance robots to understand surgeries. With the understanding of the complete surgical workflow, the robots are able to assist the surgeons in intra-operative events, such as by giving a warning when the surgeon is entering specific keys or high-risk phases. Deep learning techniques have recently been widely applied to recognizing surgical wo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.00681v4-abstract-full').style.display = 'inline'; document.getElementById('2009.00681v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.00681v4-abstract-full" style="display: none;"> Analyzing surgical workflow is crucial for surgical assistance robots to understand surgeries. With the understanding of the complete surgical workflow, the robots are able to assist the surgeons in intra-operative events, such as by giving a warning when the surgeon is entering specific keys or high-risk phases. Deep learning techniques have recently been widely applied to recognizing surgical workflows. Many of the existing temporal neural network models are limited in their capability to handle long-term dependencies in the data, instead, relying upon the strong performance of the underlying per-frame visual models. We propose a new temporal network structure that leverages task-specific network representation to collect long-term sufficient statistics that are propagated by a sufficient statistics model (SSM). We implement our approach within an LSTM backbone for the task of surgical phase recognition and explore several choices for propagated statistics. We demonstrate superior results over existing and novel state-of-the-art segmentation techniques on two laparoscopic cholecystectomy datasets: the publicly available Cholec80 dataset and MGH100, a novel dataset with more challenging and clinically meaningful segment labels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.00681v4-abstract-full').style.display = 'none'; document.getElementById('2009.00681v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.12969">arXiv:2008.12969</a> <span> [<a href="https://arxiv.org/pdf/2008.12969">pdf</a>, <a href="https://arxiv.org/format/2008.12969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Driving Through Ghosts: Behavioral Cloning with False Positives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=B%C3%BChler%2C+A">Andreas B眉hler</a>, <a href="/search/cs?searchtype=author&query=Gaidon%2C+A">Adrien Gaidon</a>, <a href="/search/cs?searchtype=author&query=Cramariuc%2C+A">Andrei Cramariuc</a>, <a href="/search/cs?searchtype=author&query=Ambrus%2C+R">Rares Ambrus</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Burgard%2C+W">Wolfram Burgard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.12969v1-abstract-short" style="display: inline;"> Safe autonomous driving requires robust detection of other traffic participants. However, robust does not mean perfect, and safe systems typically minimize missed detections at the expense of a higher false positive rate. This results in conservative and yet potentially dangerous behavior such as avoiding imaginary obstacles. In the context of behavioral cloning, perceptual errors at training time… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12969v1-abstract-full').style.display = 'inline'; document.getElementById('2008.12969v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.12969v1-abstract-full" style="display: none;"> Safe autonomous driving requires robust detection of other traffic participants. However, robust does not mean perfect, and safe systems typically minimize missed detections at the expense of a higher false positive rate. This results in conservative and yet potentially dangerous behavior such as avoiding imaginary obstacles. In the context of behavioral cloning, perceptual errors at training time can lead to learning difficulties or wrong policies, as expert demonstrations might be inconsistent with the perceived world state. In this work, we propose a behavioral cloning approach that can safely leverage imperfect perception without being conservative. Our core contribution is a novel representation of perceptual uncertainty for learning to plan. We propose a new probabilistic birds-eye-view semantic grid to encode the noisy output of object perception systems. We then leverage expert demonstrations to learn an imitative driving policy using this probabilistic representation. Using the CARLA simulator, we show that our approach can safely overcome critical false positives that would otherwise lead to catastrophic failures or conservative behavior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12969v1-abstract-full').style.display = 'none'; document.getElementById('2008.12969v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures, 4 tables, accepted at 2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.00178">arXiv:2007.00178</a> <span> [<a href="https://arxiv.org/pdf/2007.00178">pdf</a>, <a href="https://arxiv.org/format/2007.00178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning based Control of Imitative Policies for Near-Accident Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zhangjie Cao</a>, <a href="/search/cs?searchtype=author&query=B%C4%B1y%C4%B1k%2C+E">Erdem B谋y谋k</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W+Z">Woodrow Z. Wang</a>, <a href="/search/cs?searchtype=author&query=Raventos%2C+A">Allan Raventos</a>, <a href="/search/cs?searchtype=author&query=Gaidon%2C+A">Adrien Gaidon</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Sadigh%2C+D">Dorsa Sadigh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.00178v1-abstract-short" style="display: inline;"> Autonomous driving has achieved significant progress in recent years, but autonomous cars are still unable to tackle high-risk situations where a potential accident is likely. In such near-accident scenarios, even a minor change in the vehicle's actions may result in drastically different consequences. To avoid unsafe actions in near-accident scenarios, we need to fully explore the environment. Ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.00178v1-abstract-full').style.display = 'inline'; document.getElementById('2007.00178v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.00178v1-abstract-full" style="display: none;"> Autonomous driving has achieved significant progress in recent years, but autonomous cars are still unable to tackle high-risk situations where a potential accident is likely. In such near-accident scenarios, even a minor change in the vehicle's actions may result in drastically different consequences. To avoid unsafe actions in near-accident scenarios, we need to fully explore the environment. However, reinforcement learning (RL) and imitation learning (IL), two widely-used policy learning methods, cannot model rapid phase transitions and are not scalable to fully cover all the states. To address driving in near-accident scenarios, we propose a hierarchical reinforcement and imitation learning (H-ReIL) approach that consists of low-level policies learned by IL for discrete driving modes, and a high-level policy learned by RL that switches between different driving modes. Our approach exploits the advantages of both IL and RL by integrating them into a unified learning framework. Experimental results and user studies suggest our approach can achieve higher efficiency and safety compared to other methods. Analyses of the policies demonstrate our high-level policy appropriately switches between different low-level policies in near-accident driving situations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.00178v1-abstract-full').style.display = 'none'; document.getElementById('2007.00178v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 7 figures. Published at Robotics: Science and Systems (RSS) 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.08003">arXiv:2003.08003</a> <span> [<a href="https://arxiv.org/pdf/2003.08003">pdf</a>, <a href="https://arxiv.org/format/2003.08003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CARPAL: Confidence-Aware Intent Recognition for Parallel Autonomy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=McGill%2C+S+G">Stephen G. McGill</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J+A">Jonathan A. DeCastro</a>, <a href="/search/cs?searchtype=author&query=Fletcher%2C+L">Luke Fletcher</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J+J">John J. Leonard</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+B+C">Brian C. Williams</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.08003v2-abstract-short" style="display: inline;"> Predicting driver intentions is a difficult and crucial task for advanced driver assistance systems. Traditional confidence measures on predictions often ignore the way predicted trajectories affect downstream decisions for safe driving. In this paper, we propose a novel multi-task intent recognition neural network that predicts not only probabilistic driver trajectories, but also utility statisti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.08003v2-abstract-full').style.display = 'inline'; document.getElementById('2003.08003v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.08003v2-abstract-full" style="display: none;"> Predicting driver intentions is a difficult and crucial task for advanced driver assistance systems. Traditional confidence measures on predictions often ignore the way predicted trajectories affect downstream decisions for safe driving. In this paper, we propose a novel multi-task intent recognition neural network that predicts not only probabilistic driver trajectories, but also utility statistics associated with the predictions for a given downstream task. We establish a decision criterion for parallel autonomy that takes into account the role of driver trajectory prediction in real-time decision making by reasoning about estimated task-specific utility statistics. We further improve the robustness of our system by considering uncertainties in downstream planning tasks that may lead to unsafe decisions. We test our online system on a realistic urban driving dataset, and demonstrate its advantage in terms of recall and fall-out metrics compared to baseline methods, and demonstrate its effectiveness in intervention and warning use cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.08003v2-abstract-full').style.display = 'none'; document.getElementById('2003.08003v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICRA'21/RA-L'21. Author version with 9 pages, 5 figures, 2 algorithms</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.06785">arXiv:1912.06785</a> <span> [<a href="https://arxiv.org/pdf/1912.06785">pdf</a>, <a href="https://arxiv.org/format/1912.06785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LRA.2020.3004800">10.1109/LRA.2020.3004800 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Context Maps: Agent Trajectory Prediction using Location-specific Latent Maps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gilitschenski%2C+I">Igor Gilitschenski</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Arjun Gupta</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.06785v2-abstract-short" style="display: inline;"> In this paper, we propose a novel approach for agent motion prediction in cluttered environments. One of the main challenges in predicting agent motion is accounting for location and context-specific information. Our main contribution is the concept of learning context maps to improve the prediction task. Context maps are a set of location-specific latent maps that are trained alongside the predic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.06785v2-abstract-full').style.display = 'inline'; document.getElementById('1912.06785v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.06785v2-abstract-full" style="display: none;"> In this paper, we propose a novel approach for agent motion prediction in cluttered environments. One of the main challenges in predicting agent motion is accounting for location and context-specific information. Our main contribution is the concept of learning context maps to improve the prediction task. Context maps are a set of location-specific latent maps that are trained alongside the predictor. Thus, the proposed maps are capable of capturing location context beyond visual context cues (e.g. usual average speeds and typical trajectories) or predefined map primitives (such as lanes and stop lines). We pose context map learning as a multi-task training problem and describe our map model and its incorporation into a state-of-the-art trajectory predictor. In extensive experiments, it is shown that use of learned maps can significantly improve predictor accuracy. Furthermore, the performance can be additionally boosted by providing partial knowledge of map semantics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.06785v2-abstract-full').style.display = 'none'; document.getElementById('1912.06785v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.12736">arXiv:1911.12736</a> <span> [<a href="https://arxiv.org/pdf/1911.12736">pdf</a>, <a href="https://arxiv.org/format/1911.12736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DiversityGAN: Diversity-Aware Vehicle Motion Prediction via Latent Semantic Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=McGill%2C+S+G">Stephen G. McGill</a>, <a href="/search/cs?searchtype=author&query=DeCastro%2C+J+A">Jonathan A. DeCastro</a>, <a href="/search/cs?searchtype=author&query=Fletcher%2C+L">Luke Fletcher</a>, <a href="/search/cs?searchtype=author&query=Leonard%2C+J+J">John J. Leonard</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+B+C">Brian C. Williams</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.12736v2-abstract-short" style="display: inline;"> Vehicle trajectory prediction is crucial for autonomous driving and advanced driver assistant systems. While existing approaches may sample from a predicted distribution of vehicle trajectories, they lack the ability to explore it -- a key ability for evaluating safety from a planning and verification perspective. In this work, we devise a novel approach for generating realistic and diverse vehicl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.12736v2-abstract-full').style.display = 'inline'; document.getElementById('1911.12736v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.12736v2-abstract-full" style="display: none;"> Vehicle trajectory prediction is crucial for autonomous driving and advanced driver assistant systems. While existing approaches may sample from a predicted distribution of vehicle trajectories, they lack the ability to explore it -- a key ability for evaluating safety from a planning and verification perspective. In this work, we devise a novel approach for generating realistic and diverse vehicle trajectories. We extend the generative adversarial network (GAN) framework with a low-dimensional approximate semantic space, and shape that space to capture semantics such as merging and turning. We sample from this space in a way that mimics the predicted distribution, but allows us to control coverage of semantically distinct outcomes. We validate our approach on a publicly available dataset and show results that achieve state-of-the-art prediction performance, while providing improved coverage of the space of predicted trajectory semantics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.12736v2-abstract-full').style.display = 'none'; document.getElementById('1911.12736v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.05105">arXiv:1901.05105</a> <span> [<a href="https://arxiv.org/pdf/1901.05105">pdf</a>, <a href="https://arxiv.org/format/1901.05105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty-Aware Driver Trajectory Prediction at Urban Intersections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xin Huang</a>, <a href="/search/cs?searchtype=author&query=McGill%2C+S">Stephen McGill</a>, <a href="/search/cs?searchtype=author&query=Williams%2C+B+C">Brian C. Williams</a>, <a href="/search/cs?searchtype=author&query=Fletcher%2C+L">Luke Fletcher</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.05105v2-abstract-short" style="display: inline;"> Predicting the motion of a driver's vehicle is crucial for advanced driving systems, enabling detection of potential risks towards shared control between the driver and automation systems. In this paper, we propose a variational neural network approach that predicts future driver trajectory distributions for the vehicle based on multiple sensors. Our predictor generates both a conditional variatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.05105v2-abstract-full').style.display = 'inline'; document.getElementById('1901.05105v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.05105v2-abstract-full" style="display: none;"> Predicting the motion of a driver's vehicle is crucial for advanced driving systems, enabling detection of potential risks towards shared control between the driver and automation systems. In this paper, we propose a variational neural network approach that predicts future driver trajectory distributions for the vehicle based on multiple sensors. Our predictor generates both a conditional variational distribution of future trajectories, as well as a confidence estimate for different time horizons. Our approach allows us to handle inherently uncertain situations, and reason about information gain from each input, as well as combine our model with additional predictors, creating a mixture of experts. We show how to augment the variational predictor with a physics-based predictor, and based on their confidence estimations, improve overall system performance. The resulting combined model is aware of the uncertainty associated with its predictions, which can help the vehicle autonomy to make decisions with more confidence. The model is validated on real-world urban driving data collected in multiple locations. This validation demonstrates that our approach improves the prediction error of a physics-based model by 25% while successfully identifying the uncertain cases with 82% accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.05105v2-abstract-full').style.display = 'none'; document.getElementById('1901.05105v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICRA'19. 8 pages, 9 figures, 1 table. Video at https://youtu.be/clR08hRdtlM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.10119">arXiv:1811.10119</a> <span> [<a href="https://arxiv.org/pdf/1811.10119">pdf</a>, <a href="https://arxiv.org/format/1811.10119">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICRA.2019.8793579">10.1109/ICRA.2019.8793579 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Variational End-to-End Navigation and Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Amini%2C+A">Alexander Amini</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Karaman%2C+S">Sertac Karaman</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.10119v2-abstract-short" style="display: inline;"> Deep learning has revolutionized the ability to learn "end-to-end" autonomous vehicle control directly from raw sensory data. While there have been recent extensions to handle forms of navigation instruction, these works are unable to capture the full distribution of possible actions that could be taken and to reason about localization of the robot within the environment. In this paper, we extend… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.10119v2-abstract-full').style.display = 'inline'; document.getElementById('1811.10119v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.10119v2-abstract-full" style="display: none;"> Deep learning has revolutionized the ability to learn "end-to-end" autonomous vehicle control directly from raw sensory data. While there have been recent extensions to handle forms of navigation instruction, these works are unable to capture the full distribution of possible actions that could be taken and to reason about localization of the robot within the environment. In this paper, we extend end-to-end driving networks with the ability to perform point-to-point navigation as well as probabilistic localization using only noisy GPS data. We define a novel variational network capable of learning from raw camera data of the environment as well as higher level roadmaps to predict (1) a full probability distribution over the possible control commands; and (2) a deterministic control command capable of navigating on the route specified within the map. Additionally, we formulate how our model can be used to localize the robot according to correspondences between the map and the observed visual road topology, inspired by the rough localization that human drivers can perform. We test our algorithms on real-world driving data that the vehicle has never driven through before, and integrate our point-to-point navigation algorithms onboard a full-scale autonomous vehicle for real-time performance. Our localization algorithm is also evaluated over a new set of roads and intersections to demonstrates rough pose localization even in situations without any GPS prior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.10119v2-abstract-full').style.display = 'none'; document.getElementById('1811.10119v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in IEEE International Conference on Robotics and Automation (ICRA) 2019. Best Paper Award Finalist</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2019 International Conference on Robotics and Automation (ICRA) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1709.01077">arXiv:1709.01077</a> <span> [<a href="https://arxiv.org/pdf/1709.01077">pdf</a>, <a href="https://arxiv.org/format/1709.01077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Nonparametric Model for Multimodal Collaborative Activities Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Fisher%2C+J+W">John W. Fisher III</a>, <a href="/search/cs?searchtype=author&query=Rus%2C+D">Daniela Rus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1709.01077v1-abstract-short" style="display: inline;"> Ego-centric data streams provide a unique opportunity to reason about joint behavior by pooling data across individuals. This is especially evident in urban environments teeming with human activities, but which suffer from incomplete and noisy data. Collaborative human activities exhibit common spatial, temporal, and visual characteristics facilitating inference across individuals from multiple se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.01077v1-abstract-full').style.display = 'inline'; document.getElementById('1709.01077v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1709.01077v1-abstract-full" style="display: none;"> Ego-centric data streams provide a unique opportunity to reason about joint behavior by pooling data across individuals. This is especially evident in urban environments teeming with human activities, but which suffer from incomplete and noisy data. Collaborative human activities exhibit common spatial, temporal, and visual characteristics facilitating inference across individuals from multiple sensory modalities as we explore in this paper from the perspective of meetings. We propose a new Bayesian nonparametric model that enables us to efficiently pool video and GPS data towards collaborative activities analysis from multiple individuals. We demonstrate the utility of this model for inference tasks such as activity detection, classification, and summarization. We further demonstrate how spatio-temporal structure embedded in our model enables better understanding of partial and noisy observations such as localization and face detections based on social interactions. We show results on both synthetic experiments and a new dataset of egocentric video and noisy GPS data from multiple individuals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.01077v1-abstract-full').style.display = 'none'; document.getElementById('1709.01077v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1511.08886">arXiv:1511.08886</a> <span> [<a href="https://arxiv.org/pdf/1511.08886">pdf</a>, <a href="https://arxiv.org/format/1511.08886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Depth Refinement for Specular Objects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=El%2C+R+O+-">Roy Or - El</a>, <a href="/search/cs?searchtype=author&query=Hershkovitz%2C+R">Rom Hershkovitz</a>, <a href="/search/cs?searchtype=author&query=Wetzler%2C+A">Aaron Wetzler</a>, <a href="/search/cs?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/cs?searchtype=author&query=Bruckstein%2C+A+M">Alfred M. Bruckstein</a>, <a href="/search/cs?searchtype=author&query=Kimmel%2C+R">Ron Kimmel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1511.08886v2-abstract-short" style="display: inline;"> The introduction of consumer RGB-D scanners set off a major boost in 3D computer vision research. Yet, the precision of existing depth scanners is not accurate enough to recover fine details of a scanned object. While modern shading based depth refinement methods have been proven to work well with Lambertian objects, they break down in the presence of specularities. We present a novel shape from s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1511.08886v2-abstract-full').style.display = 'inline'; document.getElementById('1511.08886v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1511.08886v2-abstract-full" style="display: none;"> The introduction of consumer RGB-D scanners set off a major boost in 3D computer vision research. Yet, the precision of existing depth scanners is not accurate enough to recover fine details of a scanned object. While modern shading based depth refinement methods have been proven to work well with Lambertian objects, they break down in the presence of specularities. We present a novel shape from shading framework that addresses this issue and enhances both diffuse and specular objects' depth profiles. We take advantage of the built-in monochromatic IR projector and IR images of the RGB-D scanners and present a lighting model that accounts for the specular regions in the input image. Using this model, we reconstruct the depth map in real-time. Both quantitative tests and visual evaluations prove that the proposed method produces state of the art depth reconstruction results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1511.08886v2-abstract-full').style.display = 'none'; document.getElementById('1511.08886v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Camera-Ready version for CVPR 2016</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>