CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 192 results for author: <span class="mathjax">Knoll, A</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Knoll%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Knoll, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Knoll%2C+A&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Knoll, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02449">arXiv:2502.02449</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.02449">pdf</a>, <a href="https://arxiv.org/format/2502.02449">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TUMTraffic-VideoQA: A Benchmark for Unified Spatio-Temporal Video Understanding in Traffic Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xingcheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Larintzakis%2C+K">Konstantinos Larintzakis</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+H">Hao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Mingyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+H">Hu Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiajie Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lakshminarasimhan%2C+V">Venkatnarayanan Lakshminarasimhan</a>, <a href="/search/cs?searchtype=author&amp;query=Strand%2C+L">Leah Strand</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A+C">Alois C. Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02449v1-abstract-short" style="display: inline;"> We present TUMTraffic-VideoQA, a novel dataset and benchmark designed for spatio-temporal video understanding in complex roadside traffic scenarios. The dataset comprises 1,000 videos, featuring 85,000 multiple-choice QA pairs, 2,300 object captioning, and 5,700 object grounding annotations, encompassing diverse real-world conditions such as adverse weather and traffic anomalies. By incorporating&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02449v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02449v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02449v1-abstract-full" style="display: none;"> We present TUMTraffic-VideoQA, a novel dataset and benchmark designed for spatio-temporal video understanding in complex roadside traffic scenarios. The dataset comprises 1,000 videos, featuring 85,000 multiple-choice QA pairs, 2,300 object captioning, and 5,700 object grounding annotations, encompassing diverse real-world conditions such as adverse weather and traffic anomalies. By incorporating tuple-based spatio-temporal object expressions, TUMTraffic-VideoQA unifies three essential tasks-multiple-choice video question answering, referred object captioning, and spatio-temporal object grounding-within a cohesive evaluation framework. We further introduce the TUMTraffic-Qwen baseline model, enhanced with visual token sampling strategies, providing valuable insights into the challenges of fine-grained spatio-temporal reasoning. Extensive experiments demonstrate the dataset&#39;s complexity, highlight the limitations of existing models, and position TUMTraffic-VideoQA as a robust foundation for advancing research in intelligent transportation systems. The dataset and benchmark are publicly available to facilitate further exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02449v1-abstract-full').style.display = 'none'; document.getElementById('2502.02449v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00402">arXiv:2502.00402</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00402">pdf</a>, <a href="https://arxiv.org/format/2502.00402">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Highway Safety: Accident Detection on the A9 Test Stretch Using Roadside Sensors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Greer%2C+R">Ross Greer</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xingcheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+R">Rui Song</a>, <a href="/search/cs?searchtype=author&amp;query=Pavel%2C+M">Marc Pavel</a>, <a href="/search/cs?searchtype=author&amp;query=Lehmberg%2C+D">Daniel Lehmberg</a>, <a href="/search/cs?searchtype=author&amp;query=Ghita%2C+A">Ahmed Ghita</a>, <a href="/search/cs?searchtype=author&amp;query=Gopalkrishnan%2C+A">Akshay Gopalkrishnan</a>, <a href="/search/cs?searchtype=author&amp;query=Trivedi%2C+M">Mohan Trivedi</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00402v1-abstract-short" style="display: inline;"> Road traffic injuries are the leading cause of death for people aged 5-29, resulting in about 1.19 million deaths each year. To reduce these fatalities, it is essential to address human errors like speeding, drunk driving, and distractions. Additionally, faster accident detection and quicker medical response can help save lives. We propose an accident detection framework that combines a rule-based&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00402v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00402v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00402v1-abstract-full" style="display: none;"> Road traffic injuries are the leading cause of death for people aged 5-29, resulting in about 1.19 million deaths each year. To reduce these fatalities, it is essential to address human errors like speeding, drunk driving, and distractions. Additionally, faster accident detection and quicker medical response can help save lives. We propose an accident detection framework that combines a rule-based approach with a learning-based one. We introduce a dataset of real-world highway accidents featuring high-speed crash sequences. It includes 294,924 labeled 2D boxes, 93,012 labeled 3D boxes, and track IDs across 48,144 frames captured at 10 Hz using four roadside cameras and LiDAR sensors. The dataset covers ten object classes and is released in the OpenLABEL format. Our experiments and analysis demonstrate the reliability of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00402v1-abstract-full').style.display = 'none'; document.getElementById('2502.00402v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08083">arXiv:2501.08083</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.08083">pdf</a>, <a href="https://arxiv.org/format/2501.08083">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Vision Foundation Models for Input Monitoring in Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Keser%2C+M">Mert Keser</a>, <a href="/search/cs?searchtype=author&amp;query=Orhan%2C+H+I">Halil Ibrahim Orhan</a>, <a href="/search/cs?searchtype=author&amp;query=Amini-Naieni%2C+N">Niki Amini-Naieni</a>, <a href="/search/cs?searchtype=author&amp;query=Schwalbe%2C+G">Gesina Schwalbe</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Rottmann%2C+M">Matthias Rottmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08083v2-abstract-short" style="display: inline;"> Deep neural networks (DNNs) remain challenged by distribution shifts in complex open-world domains like automated driving (AD): Absolute robustness against yet unknown novel objects (semantic shift) or styles like lighting conditions (covariate shift) cannot be guaranteed. Hence, reliable operation-time monitors for identification of out-of-training-data-distribution (OOD) scenarios are imperative&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08083v2-abstract-full').style.display = 'inline'; document.getElementById('2501.08083v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08083v2-abstract-full" style="display: none;"> Deep neural networks (DNNs) remain challenged by distribution shifts in complex open-world domains like automated driving (AD): Absolute robustness against yet unknown novel objects (semantic shift) or styles like lighting conditions (covariate shift) cannot be guaranteed. Hence, reliable operation-time monitors for identification of out-of-training-data-distribution (OOD) scenarios are imperative. Current approaches for OOD classification are untested for complex domains like AD, are limited in the kinds of shifts they detect, or even require supervision with OOD samples. To prepare for unanticipated shifts, we instead establish a framework around a principled, unsupervised, and model-agnostic method that unifies detection of all kinds of shifts: Find a full model of the training data&#39;s feature distribution, to then use its density at new points as in-distribution (ID) score. To implement this, we propose to combine the newly available Vision Foundation Models (VFM) as feature extractors with one of four alternative density modeling techniques. In an extensive benchmark of 4 VFMs against 20 baselines, we show the superior performance of VFM feature encodings compared to shift-specific OOD monitors. Additionally, we find that sophisticated architectures outperform larger latent space dimensionality; and our method identifies samples with higher risk of errors on downstream tasks, despite being model-agnostic. This suggests that VFMs are promising to realize model-agnostic, unsupervised, reliable safety monitors in complex vision tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08083v2-abstract-full').style.display = 'none'; document.getElementById('2501.08083v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19067">arXiv:2412.19067</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.19067">pdf</a>, <a href="https://arxiv.org/format/2412.19067">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning Monocular Depth from Events via Egomotion Compensation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Meng%2C+H">Haitao Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhong%2C+C">Chonghao Zhong</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+S">Sheng Tang</a>, <a href="/search/cs?searchtype=author&amp;query=JunJia%2C+L">Lian JunJia</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+W">Wenwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+Y">Yi Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Gang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19067v1-abstract-short" style="display: inline;"> Event cameras are neuromorphically inspired sensors that sparsely and asynchronously report brightness changes. Their unique characteristics of high temporal resolution, high dynamic range, and low power consumption make them well-suited for addressing challenges in monocular depth estimation (e.g., high-speed or low-lighting conditions). However, current existing methods primarily treat event str&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19067v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19067v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19067v1-abstract-full" style="display: none;"> Event cameras are neuromorphically inspired sensors that sparsely and asynchronously report brightness changes. Their unique characteristics of high temporal resolution, high dynamic range, and low power consumption make them well-suited for addressing challenges in monocular depth estimation (e.g., high-speed or low-lighting conditions). However, current existing methods primarily treat event streams as black-box learning systems without incorporating prior physical principles, thus becoming over-parameterized and failing to fully exploit the rich temporal information inherent in event camera data. To address this limitation, we incorporate physical motion principles to propose an interpretable monocular depth estimation framework, where the likelihood of various depth hypotheses is explicitly determined by the effect of motion compensation. To achieve this, we propose a Focus Cost Discrimination (FCD) module that measures the clarity of edges as an essential indicator of focus level and integrates spatial surroundings to facilitate cost estimation. Furthermore, we analyze the noise patterns within our framework and improve it with the newly introduced Inter-Hypotheses Cost Aggregation (IHCA) module, where the cost volume is refined through cost trend prediction and multi-scale cost consistency constraints. Extensive experiments on real-world and synthetic datasets demonstrate that our proposed framework outperforms cutting-edge methods by up to 10\% in terms of the absolute relative error metric, revealing superior performance in predicting accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19067v1-abstract-full').style.display = 'none'; document.getElementById('2412.19067v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09995">arXiv:2412.09995</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.09995">pdf</a>, <a href="https://arxiv.org/format/2412.09995">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Virtualization &amp; Microservice Architecture for Software-Defined Vehicles: An Evaluation and Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wen%2C+L">Long Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Rickert%2C+M">Markus Rickert</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+F">Fengjunjie Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+J">Jianjie Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Betz%2C+T">Tobias Betz</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09995v1-abstract-short" style="display: inline;"> The emergence of Software-Defined Vehicles (SDVs) signifies a shift from a distributed network of electronic control units (ECUs) to a centralized computing architecture within the vehicle&#39;s electrical and electronic systems. This transition addresses the growing complexity and demand for enhanced functionality in traditional E/E architectures, with containerization and virtualization streamlining&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09995v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09995v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09995v1-abstract-full" style="display: none;"> The emergence of Software-Defined Vehicles (SDVs) signifies a shift from a distributed network of electronic control units (ECUs) to a centralized computing architecture within the vehicle&#39;s electrical and electronic systems. This transition addresses the growing complexity and demand for enhanced functionality in traditional E/E architectures, with containerization and virtualization streamlining software development and updates within the SDV framework. While widely used in cloud computing, their performance and suitability for intelligent vehicles have yet to be thoroughly evaluated. In this work, we conduct a comprehensive performance evaluation of containerization and virtualization on embedded and high-performance AMD64 and ARM64 systems, focusing on CPU, memory, network, and disk metrics. In addition, we assess their impact on real-world automotive applications using the Autoware framework and further integrate a microservice-based architecture to evaluate its start-up time and resource consumption. Our extensive experiments reveal a slight 0-5% performance decline in CPU, memory, and network usage for both containerization and virtualization compared to bare-metal setups, with more significant reductions in disk operations-5-15% for containerized environments and up to 35% for virtualized setups. Despite these declines, experiments with actual vehicle applications demonstrate minimal impact on the Autoware framework, and in some cases, a microservice architecture integration improves start-up time by up to 18%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09995v1-abstract-full').style.display = 'none'; document.getElementById('2412.09995v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15476">arXiv:2411.15476</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.15476">pdf</a>, <a href="https://arxiv.org/format/2411.15476">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Gassidy: Gaussian Splatting SLAM in Dynamic Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wen%2C+L">Long Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shixin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yuhong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+J">Jianjie Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+F">Fengjunjie Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15476v1-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3DGS) allows flexible adjustments to scene representation, enabling continuous optimization of scene quality during dense visual simultaneous localization and mapping (SLAM) in static environments. However, 3DGS faces challenges in handling environmental disturbances from dynamic objects with irregular movement, leading to degradation in both camera tracking accuracy and map&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15476v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15476v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15476v1-abstract-full" style="display: none;"> 3D Gaussian Splatting (3DGS) allows flexible adjustments to scene representation, enabling continuous optimization of scene quality during dense visual simultaneous localization and mapping (SLAM) in static environments. However, 3DGS faces challenges in handling environmental disturbances from dynamic objects with irregular movement, leading to degradation in both camera tracking accuracy and map reconstruction quality. To address this challenge, we develop an RGB-D dense SLAM which is called Gaussian Splatting SLAM in Dynamic Environments (Gassidy). This approach calculates Gaussians to generate rendering loss flows for each environmental component based on a designed photometric-geometric loss function. To distinguish and filter environmental disturbances, we iteratively analyze rendering loss flows to detect features characterized by changes in loss values between dynamic objects and static components. This process ensures a clean environment for accurate scene reconstruction. Compared to state-of-the-art SLAM methods, experimental results on open datasets show that Gassidy improves camera tracking precision by up to 97.9% and enhances map quality by up to 6%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15476v1-abstract-full').style.display = 'none'; document.getElementById('2411.15476v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is currently under reviewed for ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09590">arXiv:2411.09590</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09590">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Adopting RAG for LLM-Aided Future Vehicle Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zolfaghari%2C+V">Vahid Zolfaghari</a>, <a href="/search/cs?searchtype=author&amp;query=Petrovic%2C+N">Nenad Petrovic</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+F">Fengjunjie Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Lebioda%2C+K">Krzysztof Lebioda</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09590v1-abstract-short" style="display: inline;"> In this paper, we explore the integration of Large Language Models (LLMs) with Retrieval-Augmented Generation (RAG) to enhance automated design and software development in the automotive industry. We present two case studies: a standardization compliance chatbot and a design copilot, both utilizing RAG to provide accurate, context-aware responses. We evaluate four LLMs-GPT-4o, LLAMA3, Mistral, and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09590v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09590v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09590v1-abstract-full" style="display: none;"> In this paper, we explore the integration of Large Language Models (LLMs) with Retrieval-Augmented Generation (RAG) to enhance automated design and software development in the automotive industry. We present two case studies: a standardization compliance chatbot and a design copilot, both utilizing RAG to provide accurate, context-aware responses. We evaluate four LLMs-GPT-4o, LLAMA3, Mistral, and Mixtral -- comparing their answering accuracy and execution time. Our results demonstrate that while GPT-4 offers superior performance, LLAMA3 and Mistral also show promising capabilities for local deployment, addressing data privacy concerns in automotive applications. This study highlights the potential of RAG-augmented LLMs in improving design workflows and compliance in automotive engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09590v1-abstract-full').style.display = 'none'; document.getElementById('2411.09590v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Conference paper accepted in IEEE FLLM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08060">arXiv:2411.08060</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.08060">pdf</a>, <a href="https://arxiv.org/format/2411.08060">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Online Collision Risk Estimation via Monocular Depth-Aware Object Detectors and Fuzzy Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liao%2C+B+H">Brian Hsuan-Cheng Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yingjie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+C">Chih-Hong Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Esen%2C+H">Hasan Esen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08060v1-abstract-short" style="display: inline;"> This paper presents a monitoring framework that infers the level of autonomous vehicle (AV) collision risk based on its object detector&#39;s performance using only monocular camera images. Essentially, the framework takes two sets of predictions produced by different algorithms and associates their inconsistencies with the collision risk via fuzzy inference. The first set of predictions is obtained t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08060v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08060v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08060v1-abstract-full" style="display: none;"> This paper presents a monitoring framework that infers the level of autonomous vehicle (AV) collision risk based on its object detector&#39;s performance using only monocular camera images. Essentially, the framework takes two sets of predictions produced by different algorithms and associates their inconsistencies with the collision risk via fuzzy inference. The first set of predictions is obtained through retrieving safety-critical 2.5D objects from a depth map, and the second set comes from the AV&#39;s 3D object detector. We experimentally validate that, based on Intersection-over-Union (IoU) and a depth discrepancy measure, the inconsistencies between the two sets of predictions strongly correlate to the safety-related error of the 3D object detector against ground truths. This correlation allows us to construct a fuzzy inference system and map the inconsistency measures to an existing collision risk indicator. In particular, we apply various knowledge- and data-driven techniques and find using particle swarm optimization that learns general fuzzy rules gives the best mapping result. Lastly, we validate our monitor&#39;s capability to produce relevant risk estimates with the large-scale nuScenes dataset and show it can safeguard an AV in closed-loop simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08060v1-abstract-full').style.display = 'none'; document.getElementById('2411.08060v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages (IEEE double column format), 5 figures, 3 tables, submitted to ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19414">arXiv:2410.19414</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19414">pdf</a>, <a href="https://arxiv.org/format/2410.19414">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Motion Planning for Robotics: A Review for Sampling-based Planners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Liding Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+K">Kuanqi Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zewei Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chaoqun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Figueredo%2C+L">Luis Figueredo</a>, <a href="/search/cs?searchtype=author&amp;query=Haddadin%2C+S">Sami Haddadin</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19414v2-abstract-short" style="display: inline;"> Recent advancements in robotics have transformed industries such as manufacturing, logistics, surgery, and planetary exploration. A key challenge is developing efficient motion planning algorithms that allow robots to navigate complex environments while avoiding collisions and optimizing metrics like path length, sweep area, execution time, and energy consumption. Among the available algorithms, s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19414v2-abstract-full').style.display = 'inline'; document.getElementById('2410.19414v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19414v2-abstract-full" style="display: none;"> Recent advancements in robotics have transformed industries such as manufacturing, logistics, surgery, and planetary exploration. A key challenge is developing efficient motion planning algorithms that allow robots to navigate complex environments while avoiding collisions and optimizing metrics like path length, sweep area, execution time, and energy consumption. Among the available algorithms, sampling-based methods have gained the most traction in both research and industry due to their ability to handle complex environments, explore free space, and offer probabilistic completeness along with other formal guarantees. Despite their widespread application, significant challenges still remain. To advance future planning algorithms, it is essential to review the current state-of-the-art solutions and their limitations. In this context, this work aims to shed light on these challenges and assess the development and applicability of sampling-based methods. Furthermore, we aim to provide an in-depth analysis of the design and evaluation of ten of the most popular planners across various scenarios. Our findings highlight the strides made in sampling-based methods while underscoring persistent challenges. This work offers an overview of the important ongoing research in robotic motion planning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19414v2-abstract-full').style.display = 'none'; document.getElementById('2410.19414v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17109">arXiv:2409.17109</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.17109">pdf</a>, <a href="https://arxiv.org/format/2409.17109">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unveiling Ontological Commitment in Multi-Modal Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Keser%2C+M">Mert Keser</a>, <a href="/search/cs?searchtype=author&amp;query=Schwalbe%2C+G">Gesina Schwalbe</a>, <a href="/search/cs?searchtype=author&amp;query=Amini-Naieni%2C+N">Niki Amini-Naieni</a>, <a href="/search/cs?searchtype=author&amp;query=Rottmann%2C+M">Matthias Rottmann</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17109v1-abstract-short" style="display: inline;"> Ontological commitment, i.e., used concepts, relations, and assumptions, are a corner stone of qualitative reasoning (QR) models. The state-of-the-art for processing raw inputs, though, are deep neural networks (DNNs), nowadays often based off from multimodal foundation models. These automatically learn rich representations of concepts and respective reasoning. Unfortunately, the learned qualitati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17109v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17109v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17109v1-abstract-full" style="display: none;"> Ontological commitment, i.e., used concepts, relations, and assumptions, are a corner stone of qualitative reasoning (QR) models. The state-of-the-art for processing raw inputs, though, are deep neural networks (DNNs), nowadays often based off from multimodal foundation models. These automatically learn rich representations of concepts and respective reasoning. Unfortunately, the learned qualitative knowledge is opaque, preventing easy inspection, validation, or adaptation against available QR models. So far, it is possible to associate pre-defined concepts with latent representations of DNNs, but extractable relations are mostly limited to semantic similarity. As a next step towards QR for validation and verification of DNNs: Concretely, we propose a method that extracts the learned superclass hierarchy from a multimodal DNN for a given set of leaf concepts. Under the hood we (1) obtain leaf concept embeddings using the DNN&#39;s textual input modality; (2) apply hierarchical clustering to them, using that DNNs encode semantic similarities via vector distances; and (3) label the such-obtained parent concepts using search in available ontologies from QR. An initial evaluation study shows that meaningful ontological class hierarchies can be extracted from state-of-the-art foundation models. Furthermore, we demonstrate how to validate and verify a DNN&#39;s learned representations against given ontologies. Lastly, we discuss potential future applications in the context of QR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17109v1-abstract-full').style.display = 'none'; document.getElementById('2409.17109v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Qualitative Reasoning Workshop 2024 (QR2024) colocated with ECAI2024, camera-ready submission; first two authors contributed equally; 10 pages, 4 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11863">arXiv:2409.11863</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.11863">pdf</a>, <a href="https://arxiv.org/format/2409.11863">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Task Planning from Multi-Modal Demonstration for Multi-Stage Contact-Rich Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+K">Kejia Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zheng Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yue Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lingyun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+F">Fan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Haddadin%2C+S">Sami Haddadin</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11863v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have gained popularity in task planning for long-horizon manipulation tasks. To enhance the validity of LLM-generated plans, visual demonstrations and online videos have been widely employed to guide the planning process. However, for manipulation tasks involving subtle movements but rich contact interactions, visual perception alone may be insufficient for the LLM to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11863v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11863v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11863v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have gained popularity in task planning for long-horizon manipulation tasks. To enhance the validity of LLM-generated plans, visual demonstrations and online videos have been widely employed to guide the planning process. However, for manipulation tasks involving subtle movements but rich contact interactions, visual perception alone may be insufficient for the LLM to fully interpret the demonstration. Additionally, visual data provides limited information on force-related parameters and conditions, which are crucial for effective execution on real robots. In this paper, we introduce an in-context learning framework that incorporates tactile and force-torque information from human demonstrations to enhance LLMs&#39; ability to generate plans for new task scenarios. We propose a bootstrapped reasoning pipeline that sequentially integrates each modality into a comprehensive task plan. This task plan is then used as a reference for planning in new task configurations. Real-world experiments on two different sequential manipulation tasks demonstrate the effectiveness of our framework in improving LLMs&#39; understanding of multi-modal demonstrations and enhancing the overall planning performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11863v1-abstract-full').style.display = 'none'; document.getElementById('2409.11863v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11047">arXiv:2409.11047</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.11047">pdf</a>, <a href="https://arxiv.org/format/2409.11047">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> TacDiffusion: Force-domain Diffusion Policy for Precise Tactile Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yansong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zongxie Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+F">Fan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lingyun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Liding Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Swikir%2C+A">Abdalla Swikir</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Haddadin%2C+S">Sami Haddadin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11047v1-abstract-short" style="display: inline;"> Assembly is a crucial skill for robots in both modern manufacturing and service robotics. However, mastering transferable insertion skills that can handle a variety of high-precision assembly tasks remains a significant challenge. This paper presents a novel framework that utilizes diffusion models to generate 6D wrench for high-precision tactile robotic insertion tasks. It learns from demonstrati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11047v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11047v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11047v1-abstract-full" style="display: none;"> Assembly is a crucial skill for robots in both modern manufacturing and service robotics. However, mastering transferable insertion skills that can handle a variety of high-precision assembly tasks remains a significant challenge. This paper presents a novel framework that utilizes diffusion models to generate 6D wrench for high-precision tactile robotic insertion tasks. It learns from demonstrations performed on a single task and achieves a zero-shot transfer success rate of 95.7% across various novel high-precision tasks. Our method effectively inherits the self-adaptability demonstrated by our previous work. In this framework, we address the frequency misalignment between the diffusion policy and the real-time control loop with a dynamic system-based filter, significantly improving the task success rate by 9.15%. Furthermore, we provide a practical guideline regarding the trade-off between diffusion models&#39; inference ability and speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11047v1-abstract-full').style.display = 'none'; document.getElementById('2409.11047v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.17222">arXiv:2408.17222</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.17222">pdf</a>, <a href="https://arxiv.org/format/2408.17222">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> How Could Generative AI Support Compliance with the EU AI Act? A Review for Safe Automated Driving Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Keser%2C+M">Mert Keser</a>, <a href="/search/cs?searchtype=author&amp;query=Shoeb%2C+Y">Youssef Shoeb</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.17222v1-abstract-short" style="display: inline;"> Deep Neural Networks (DNNs) have become central for the perception functions of autonomous vehicles, substantially enhancing their ability to understand and interpret the environment. However, these systems exhibit inherent limitations such as brittleness, opacity, and unpredictable behavior in out-of-distribution scenarios. The European Union (EU) Artificial Intelligence (AI) Act, as a pioneering&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17222v1-abstract-full').style.display = 'inline'; document.getElementById('2408.17222v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.17222v1-abstract-full" style="display: none;"> Deep Neural Networks (DNNs) have become central for the perception functions of autonomous vehicles, substantially enhancing their ability to understand and interpret the environment. However, these systems exhibit inherent limitations such as brittleness, opacity, and unpredictable behavior in out-of-distribution scenarios. The European Union (EU) Artificial Intelligence (AI) Act, as a pioneering legislative framework, aims to address these challenges by establishing stringent norms and standards for AI systems, including those used in autonomous driving (AD), which are categorized as high-risk AI. In this work, we explore how the newly available generative AI models can potentially support addressing upcoming regulatory requirements in AD perception, particularly with respect to safety. This short review paper summarizes the requirements arising from the EU AI Act regarding DNN-based perception systems and systematically categorizes existing generative AI applications in AD. While generative AI models show promise in addressing some of the EU AI Acts requirements, such as transparency and robustness, this review examines their potential benefits and discusses how developers could leverage these methods to enhance compliance with the Act. The paper also highlights areas where further research is needed to ensure reliable and safe integration of these technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17222v1-abstract-full').style.display = 'none'; document.getElementById('2408.17222v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15637">arXiv:2408.15637</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.15637">pdf</a>, <a href="https://arxiv.org/format/2408.15637">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Transfer Learning from Simulated to Real Scenes for Monocular 3D Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mohamed%2C+S">Sondos Mohamed</a>, <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Greer%2C+R">Ross Greer</a>, <a href="/search/cs?searchtype=author&amp;query=Ghita%2C+A+A">Ahmed Alaaeldin Ghita</a>, <a href="/search/cs?searchtype=author&amp;query=Castrill%C3%B3n-Santana%2C+M">Modesto Castrill贸n-Santana</a>, <a href="/search/cs?searchtype=author&amp;query=Trivedi%2C+M">Mohan Trivedi</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Carta%2C+S+M">Salvatore Mario Carta</a>, <a href="/search/cs?searchtype=author&amp;query=Marras%2C+M">Mirko Marras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15637v1-abstract-short" style="display: inline;"> Accurately detecting 3D objects from monocular images in dynamic roadside scenarios remains a challenging problem due to varying camera perspectives and unpredictable scene conditions. This paper introduces a two-stage training strategy to address these challenges. Our approach initially trains a model on the large-scale synthetic dataset, RoadSense3D, which offers a diverse range of scenarios for&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15637v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15637v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15637v1-abstract-full" style="display: none;"> Accurately detecting 3D objects from monocular images in dynamic roadside scenarios remains a challenging problem due to varying camera perspectives and unpredictable scene conditions. This paper introduces a two-stage training strategy to address these challenges. Our approach initially trains a model on the large-scale synthetic dataset, RoadSense3D, which offers a diverse range of scenarios for robust feature learning. Subsequently, we fine-tune the model on a combination of real-world datasets to enhance its adaptability to practical conditions. Experimental results of the Cube R-CNN model on challenging public benchmarks show a remarkable improvement in detection performance, with a mean average precision rising from 0.26 to 12.76 on the TUM Traffic A9 Highway dataset and from 2.09 to 6.60 on the DAIR-V2X-I dataset when performing transfer learning. Code, data, and qualitative video results are available on the project website: https://roadsense3d.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15637v1-abstract-full').style.display = 'none'; document.getElementById('2408.15637v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages. Accepted for ECVA European Conference on Computer Vision 2024 (ECCV&#39;24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09675">arXiv:2408.09675</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.09675">pdf</a>, <a href="https://arxiv.org/format/2408.09675">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Reinforcement Learning for Autonomous Driving: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruiqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+J">Jing Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Walter%2C+F">Florian Walter</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+S">Shangding Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+J">Jiayi Guan</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%B6hrbein%2C+F">Florian R枚hrbein</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+Y">Yali Du</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+P">Panpan Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09675v1-abstract-short" style="display: inline;"> Reinforcement Learning (RL) is a potent tool for sequential decision-making and has achieved performance surpassing human capabilities across many challenging real-world tasks. As the extension of RL in the multi-agent system domain, multi-agent RL (MARL) not only need to learn the control policy but also requires consideration regarding interactions with all other agents in the environment, mutua&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09675v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09675v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09675v1-abstract-full" style="display: none;"> Reinforcement Learning (RL) is a potent tool for sequential decision-making and has achieved performance surpassing human capabilities across many challenging real-world tasks. As the extension of RL in the multi-agent system domain, multi-agent RL (MARL) not only need to learn the control policy but also requires consideration regarding interactions with all other agents in the environment, mutual influences among different system components, and the distribution of computational resources. This augments the complexity of algorithmic design and poses higher requirements on computational resources. Simultaneously, simulators are crucial to obtain realistic data, which is the fundamentals of RL. In this paper, we first propose a series of metrics of simulators and summarize the features of existing benchmarks. Second, to ease comprehension, we recall the foundational knowledge and then synthesize the recently advanced studies of MARL-related autonomous driving and intelligent transportation systems. Specifically, we examine their environmental modeling, state representation, perception units, and algorithm design. Conclusively, we discuss open challenges as well as prospects and opportunities. We hope this paper can help the researchers integrate MARL technologies and trigger more insightful ideas toward the intelligent and autonomous driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09675v1-abstract-full').style.display = 'none'; document.getElementById('2408.09675v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 6 figures and 2 tables. Submitted to IEEE Journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20818">arXiv:2407.20818</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.20818">pdf</a>, <a href="https://arxiv.org/format/2407.20818">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WARM-3D: A Weakly-Supervised Sim2Real Domain Adaptation Framework for Roadside Monocular 3D Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xingcheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+D">Deyu Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Mingyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lakshminarasimhan%2C+V">Venkatnarayanan Lakshminarasimhan</a>, <a href="/search/cs?searchtype=author&amp;query=Strand%2C+L">Leah Strand</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A+C">Alois C. Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20818v1-abstract-short" style="display: inline;"> Existing roadside perception systems are limited by the absence of publicly available, large-scale, high-quality 3D datasets. Exploring the use of cost-effective, extensive synthetic datasets offers a viable solution to tackle this challenge and enhance the performance of roadside monocular 3D detection. In this study, we introduce the TUMTraf Synthetic Dataset, offering a diverse and substantial&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20818v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20818v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20818v1-abstract-full" style="display: none;"> Existing roadside perception systems are limited by the absence of publicly available, large-scale, high-quality 3D datasets. Exploring the use of cost-effective, extensive synthetic datasets offers a viable solution to tackle this challenge and enhance the performance of roadside monocular 3D detection. In this study, we introduce the TUMTraf Synthetic Dataset, offering a diverse and substantial collection of high-quality 3D data to augment scarce real-world datasets. Besides, we present WARM-3D, a concise yet effective framework to aid the Sim2Real domain transfer for roadside monocular 3D detection. Our method leverages cheap synthetic datasets and 2D labels from an off-the-shelf 2D detector for weak supervision. We show that WARM-3D significantly enhances performance, achieving a +12.40% increase in mAP 3D over the baseline with only pseudo-2D supervision. With 2D GT as weak labels, WARM-3D even reaches performance close to the Oracle baseline. Moreover, WARM-3D improves the ability of 3D detectors to unseen sample recognition across various real-world environments, highlighting its potential for practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20818v1-abstract-full').style.display = 'none'; document.getElementById('2407.20818v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17348">arXiv:2407.17348</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.17348">pdf</a>, <a href="https://arxiv.org/format/2407.17348">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DexGANGrasp: Dexterous Generative Adversarial Grasping Synthesis for Task-Oriented Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Q">Qian Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Lema%2C+D+S+M">David S. Martinez Lema</a>, <a href="/search/cs?searchtype=author&amp;query=Malmir%2C+M">Mohammadhossein Malmir</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+J">Jianxiang Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhaopeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17348v2-abstract-short" style="display: inline;"> We introduce DexGanGrasp, a dexterous grasping synthesis method that generates and evaluates grasps with single view in real time. DexGanGrasp comprises a Conditional Generative Adversarial Networks (cGANs)-based DexGenerator to generate dexterous grasps and a discriminator-like DexEvalautor to assess the stability of these grasps. Extensive simulation and real-world expriments showcases the effec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17348v2-abstract-full').style.display = 'inline'; document.getElementById('2407.17348v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17348v2-abstract-full" style="display: none;"> We introduce DexGanGrasp, a dexterous grasping synthesis method that generates and evaluates grasps with single view in real time. DexGanGrasp comprises a Conditional Generative Adversarial Networks (cGANs)-based DexGenerator to generate dexterous grasps and a discriminator-like DexEvalautor to assess the stability of these grasps. Extensive simulation and real-world expriments showcases the effectiveness of our proposed method, outperforming the baseline FFHNet with an 18.57% higher success rate in real-world evaluation. We further extend DexGanGrasp to DexAfford-Prompt, an open-vocabulary affordance grounding pipeline for dexterous grasping leveraging Multimodal Large Language Models (MLLMs) and Vision Language Models (VLMs), to achieve task-oriented grasping with successful real-world deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17348v2-abstract-full').style.display = 'none'; document.getElementById('2407.17348v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15161">arXiv:2407.15161</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.15161">pdf</a>, <a href="https://arxiv.org/format/2407.15161">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FFHFlow: A Flow-based Variational Approach for Learning Diverse Dexterous Grasps with Shape-Aware Introspection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Q">Qian Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+J">Jianxiang Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhaopeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Triebel%2C+R">Rudolph Triebel</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15161v2-abstract-short" style="display: inline;"> Synthesizing diverse dexterous grasps from uncertain partial observation is an important yet challenging task for physically intelligent embodiments. Previous works on generative grasp synthesis fell short of precisely capturing the complex grasp distribution and reasoning about shape uncertainty in the unstructured and often partially perceived reality. In this work, we introduce a novel model th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15161v2-abstract-full').style.display = 'inline'; document.getElementById('2407.15161v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15161v2-abstract-full" style="display: none;"> Synthesizing diverse dexterous grasps from uncertain partial observation is an important yet challenging task for physically intelligent embodiments. Previous works on generative grasp synthesis fell short of precisely capturing the complex grasp distribution and reasoning about shape uncertainty in the unstructured and often partially perceived reality. In this work, we introduce a novel model that can generate diverse grasps for a multi-fingered hand while introspectively handling perceptual uncertainty and recognizing unknown object geometry to avoid performance degradation. Specifically, we devise a Deep Latent Variable Model (DLVM) based on Normalizing Flows (NFs), facilitating hierarchical and expressive latent representation for modeling versatile grasps. Our model design counteracts typical pitfalls of its popular alternative in generative grasping, i.e., conditional Variational Autoencoders (cVAEs) whose performance is limited by mode collapse and miss-specified prior issues. Moreover, the resultant feature hierarchy and the exact flow likelihood computation endow our model with shape-aware introspective capabilities, enabling it to quantify the shape uncertainty of partial point clouds and detect objects of novel geometry. We further achieve performance gain by fusing this information with a discriminative grasp evaluator, facilitating a novel hybrid way for grasp evaluation. Comprehensive simulated and real-world experiments show that the proposed idea gains superior performance and higher run-time efficiency against strong baselines, including diffusion models. We also demonstrate substantial benefits of greater diversity for grasping objects in clutter and a confined workspace in the real world. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15161v2-abstract-full').style.display = 'none'; document.getElementById('2407.15161v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">First two authors contributed equally, whose ordering decided via coin-tossing. Under Reivew</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12582">arXiv:2407.12582</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.12582">pdf</a>, <a href="https://arxiv.org/format/2407.12582">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Embracing Events and Frames with Hierarchical Feature Refinement Network for Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cao%2C+H">Hu Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zehua Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xinyi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+J">Jiahao Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12582v2-abstract-short" style="display: inline;"> In frame-based vision, object detection faces substantial performance degradation under challenging conditions due to the limited sensing capability of conventional cameras. Event cameras output sparse and asynchronous events, providing a potential solution to solve these problems. However, effectively fusing two heterogeneous modalities remains an open issue. In this work, we propose a novel hier&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12582v2-abstract-full').style.display = 'inline'; document.getElementById('2407.12582v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12582v2-abstract-full" style="display: none;"> In frame-based vision, object detection faces substantial performance degradation under challenging conditions due to the limited sensing capability of conventional cameras. Event cameras output sparse and asynchronous events, providing a potential solution to solve these problems. However, effectively fusing two heterogeneous modalities remains an open issue. In this work, we propose a novel hierarchical feature refinement network for event-frame fusion. The core concept is the design of the coarse-to-fine fusion module, denoted as the cross-modality adaptive feature refinement (CAFR) module. In the initial phase, the bidirectional cross-modality interaction (BCI) part facilitates information bridging from two distinct sources. Subsequently, the features are further refined by aligning the channel-level mean and variance in the two-fold adaptive feature refinement (TAFR) part. We conducted extensive experiments on two benchmarks: the low-resolution PKU-DDD17-Car dataset and the high-resolution DSEC dataset. Experimental results show that our method surpasses the state-of-the-art by an impressive margin of $\textbf{8.0}\%$ on the DSEC dataset. Besides, our method exhibits significantly better robustness (\textbf{69.5}\% versus \textbf{38.7}\%) when introducing 15 different corruption types to the frame images. The code can be found at the link (https://github.com/HuCaoFighting/FRN). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12582v2-abstract-full').style.display = 'none'; document.getElementById('2407.12582v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12387">arXiv:2407.12387</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.12387">pdf</a>, <a href="https://arxiv.org/format/2407.12387">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HGL: Hierarchical Geometry Learning for Test-time Adaptation in 3D Point Cloud Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zou%2C+T">Tianpei Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+S">Sanqing Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhijun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+L">Lianghua He</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+C">Changjun Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12387v1-abstract-short" style="display: inline;"> 3D point cloud segmentation has received significant interest for its growing applications. However, the generalization ability of models suffers in dynamic scenarios due to the distribution shift between test and training data. To promote robustness and adaptability across diverse scenarios, test-time adaptation (TTA) has recently been introduced. Nevertheless, most existing TTA methods are devel&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12387v1-abstract-full').style.display = 'inline'; document.getElementById('2407.12387v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12387v1-abstract-full" style="display: none;"> 3D point cloud segmentation has received significant interest for its growing applications. However, the generalization ability of models suffers in dynamic scenarios due to the distribution shift between test and training data. To promote robustness and adaptability across diverse scenarios, test-time adaptation (TTA) has recently been introduced. Nevertheless, most existing TTA methods are developed for images, and limited approaches applicable to point clouds ignore the inherent hierarchical geometric structures in point cloud streams, i.e., local (point-level), global (object-level), and temporal (frame-level) structures. In this paper, we delve into TTA in 3D point cloud segmentation and propose a novel Hierarchical Geometry Learning (HGL) framework. HGL comprises three complementary modules from local, global to temporal learning in a bottom-up manner.Technically, we first construct a local geometry learning module for pseudo-label generation. Next, we build prototypes from the global geometry perspective for pseudo-label fine-tuning. Furthermore, we introduce a temporal consistency regularization module to mitigate negative transfer. Extensive experiments on four datasets demonstrate the effectiveness and superiority of our HGL. Remarkably, on the SynLiDAR to SemanticKITTI task, HGL achieves an overall mIoU of 46.91\%, improving GIPSO by 3.0\% and significantly reducing the required adaptation time by 80\%. The code is available at https://github.com/tpzou/HGL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12387v1-abstract-full').style.display = 'none'; document.getElementById('2407.12387v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ECCV 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00451">arXiv:2407.00451</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.00451">pdf</a>, <a href="https://arxiv.org/format/2407.00451">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Language-Guided Object-Centric Diffusion Policy for Collision-Aware Robotic Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Q">Qian Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zhi Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+J">Jianxiang Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00451v2-abstract-short" style="display: inline;"> Learning from demonstrations faces challenges in generalizing beyond the training data and is fragile even to slight visual variations. To tackle this problem, we introduce Lan-o3dp, a language guided object centric diffusion policy that takes 3d representation of task relevant objects as conditional input and can be guided by cost function for safety constraints at inference time. Lan-o3dp enable&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00451v2-abstract-full').style.display = 'inline'; document.getElementById('2407.00451v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00451v2-abstract-full" style="display: none;"> Learning from demonstrations faces challenges in generalizing beyond the training data and is fragile even to slight visual variations. To tackle this problem, we introduce Lan-o3dp, a language guided object centric diffusion policy that takes 3d representation of task relevant objects as conditional input and can be guided by cost function for safety constraints at inference time. Lan-o3dp enables strong generalization in various aspects, such as background changes, visual ambiguity and can avoid novel obstacles that are unseen during the demonstration process. Specifically, We first train a diffusion policy conditioned on point clouds of target objects and then harness a large language model to decompose the user instruction into task related units consisting of target objects and obstacles, which can be used as visual observation for the policy network or converted to a cost function, guiding the generation of trajectory towards collision free region at test time. Our proposed method shows training efficiency and higher success rates compared with the baselines in simulation experiments. In real world experiments, our method exhibits strong generalization performance towards unseen instances, cluttered scenes, scenes of multiple similar objects and demonstrates training free capability of obstacle avoidance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00451v2-abstract-full').style.display = 'none'; document.getElementById('2407.00451v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03229">arXiv:2406.03229</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.03229">pdf</a>, <a href="https://arxiv.org/format/2406.03229">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Global Clipper: Enhancing Safety and Reliability of Transformer-based Object Detection Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sha%2C+Q+S">Qutub Syed Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Paulitsch%2C+M">Michael Paulitsch</a>, <a href="/search/cs?searchtype=author&amp;query=Pattabiraman%2C+K">Karthik Pattabiraman</a>, <a href="/search/cs?searchtype=author&amp;query=Hagn%2C+K">Korbinian Hagn</a>, <a href="/search/cs?searchtype=author&amp;query=Oboril%2C+F">Fabian Oboril</a>, <a href="/search/cs?searchtype=author&amp;query=Buerkle%2C+C">Cornelius Buerkle</a>, <a href="/search/cs?searchtype=author&amp;query=Scholl%2C+K">Kay-Ulrich Scholl</a>, <a href="/search/cs?searchtype=author&amp;query=Hinz%2C+G">Gereon Hinz</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03229v4-abstract-short" style="display: inline;"> As transformer-based object detection models progress, their impact in critical sectors like autonomous vehicles and aviation is expected to grow. Soft errors causing bit flips during inference have significantly impacted DNN performance, altering predictions. Traditional range restriction solutions for CNNs fall short for transformers. This study introduces the Global Clipper and Global Hybrid Cl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03229v4-abstract-full').style.display = 'inline'; document.getElementById('2406.03229v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03229v4-abstract-full" style="display: none;"> As transformer-based object detection models progress, their impact in critical sectors like autonomous vehicles and aviation is expected to grow. Soft errors causing bit flips during inference have significantly impacted DNN performance, altering predictions. Traditional range restriction solutions for CNNs fall short for transformers. This study introduces the Global Clipper and Global Hybrid Clipper, effective mitigation strategies specifically designed for transformer-based models. It significantly enhances their resilience to soft errors and reduces faulty inferences to ~ 0\%. We also detail extensive testing across over 64 scenarios involving two transformer models (DINO-DETR and Lite-DETR) and two CNN models (YOLOv3 and SSD) using three datasets, totalling approximately 3.3 million inferences, to assess model robustness comprehensively. Moreover, the paper explores unique aspects of attention blocks in transformers and their operational differences from CNNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03229v4-abstract-full').style.display = 'none'; document.getElementById('2406.03229v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IJCAI-AISafety&#39;24 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03188">arXiv:2406.03188</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.03188">pdf</a>, <a href="https://arxiv.org/format/2406.03188">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Situation Monitor: Diversity-Driven Zero-Shot Out-of-Distribution Detection using Budding Ensemble Architecture for Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Syed%2C+Q">Qutub Syed</a>, <a href="/search/cs?searchtype=author&amp;query=Paulitsch%2C+M">Michael Paulitsch</a>, <a href="/search/cs?searchtype=author&amp;query=Hagn%2C+K">Korbinian Hagn</a>, <a href="/search/cs?searchtype=author&amp;query=Cihangir%2C+N+K">Neslihan Kose Cihangir</a>, <a href="/search/cs?searchtype=author&amp;query=Scholl%2C+K">Kay-Ulrich Scholl</a>, <a href="/search/cs?searchtype=author&amp;query=Oboril%2C+F">Fabian Oboril</a>, <a href="/search/cs?searchtype=author&amp;query=Hinz%2C+G">Gereon Hinz</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03188v1-abstract-short" style="display: inline;"> We introduce Situation Monitor, a novel zero-shot Out-of-Distribution (OOD) detection approach for transformer-based object detection models to enhance reliability in safety-critical machine learning applications such as autonomous driving. The Situation Monitor utilizes the Diversity-based Budding Ensemble Architecture (DBEA) and increases the OOD performance by integrating a diversity loss into&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03188v1-abstract-full').style.display = 'inline'; document.getElementById('2406.03188v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03188v1-abstract-full" style="display: none;"> We introduce Situation Monitor, a novel zero-shot Out-of-Distribution (OOD) detection approach for transformer-based object detection models to enhance reliability in safety-critical machine learning applications such as autonomous driving. The Situation Monitor utilizes the Diversity-based Budding Ensemble Architecture (DBEA) and increases the OOD performance by integrating a diversity loss into the training process on top of the budding ensemble architecture, detecting Far-OOD samples and minimizing false positives on Near-OOD samples. Moreover, utilizing the resulting DBEA increases the model&#39;s OOD performance and improves the calibration of confidence scores, particularly concerning the intersection over union of the detected objects. The DBEA model achieves these advancements with a 14% reduction in trainable parameters compared to the vanilla model. This signifies a substantial improvement in efficiency without compromising the model&#39;s ability to detect OOD instances and calibrate the confidence scores accurately. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03188v1-abstract-full').style.display = 'none'; document.getElementById('2406.03188v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted at CVPR SAIAD Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00430">arXiv:2406.00430</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00430">pdf</a>, <a href="https://arxiv.org/format/2406.00430">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Uncertainty-based Failure Detection for Closed-Loop LLM Planners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zhi Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Q">Qian Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+J">Jianxiang Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00430v1-abstract-short" style="display: inline;"> Recently, Large Language Models (LLMs) have witnessed remarkable performance as zero-shot task planners for robotic manipulation tasks. However, the open-loop nature of previous works makes LLM-based planning error-prone and fragile. On the other hand, failure detection approaches for closed-loop planning are often limited by task-specific heuristics or following an unrealistic assumption that the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00430v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00430v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00430v1-abstract-full" style="display: none;"> Recently, Large Language Models (LLMs) have witnessed remarkable performance as zero-shot task planners for robotic manipulation tasks. However, the open-loop nature of previous works makes LLM-based planning error-prone and fragile. On the other hand, failure detection approaches for closed-loop planning are often limited by task-specific heuristics or following an unrealistic assumption that the prediction is trustworthy all the time. As a general-purpose reasoning machine, LLMs or Multimodal Large Language Models (MLLMs) are promising for detecting failures. However, However, the appropriateness of the aforementioned assumption diminishes due to the notorious hullucination problem. In this work, we attempt to mitigate these issues by introducing a framework for closed-loop LLM-based planning called KnowLoop, backed by an uncertainty-based MLLMs failure detector, which is agnostic to any used MLLMs or LLMs. Specifically, we evaluate three different ways for quantifying the uncertainty of MLLMs, namely token probability, entropy, and self-explained confidence as primary metrics based on three carefully designed representative prompting strategies. With a self-collected dataset including various manipulation tasks and an LLM-based robot system, our experiments demonstrate that token probability and entropy are more reflective compared to self-explained confidence. By setting an appropriate threshold to filter out uncertain predictions and seek human help actively, the accuracy of failure detection can be significantly enhanced. This improvement boosts the effectiveness of closed-loop planning and the overall success rate of tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00430v1-abstract-full').style.display = 'none'; document.getElementById('2406.00430v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICRA 2024 Workshop on Back to the Future: Robot Learning Going Probabilistic. Website: https://sites.google.com/view/konwloop/home</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20860">arXiv:2405.20860</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.20860">pdf</a>, <a href="https://arxiv.org/format/2405.20860">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Efficiency of Safe Reinforcement Learning via Sample Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+S">Shangding Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+L">Laixi Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yuhao Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Spanos%2C+C">Costas Spanos</a>, <a href="/search/cs?searchtype=author&amp;query=Wierman%2C+A">Adam Wierman</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20860v1-abstract-short" style="display: inline;"> Safe reinforcement learning (RL) is crucial for deploying RL agents in real-world applications, as it aims to maximize long-term rewards while satisfying safety constraints. However, safe RL often suffers from sample inefficiency, requiring extensive interactions with the environment to learn a safe policy. We propose Efficient Safe Policy Optimization (ESPO), a novel approach that enhances the ef&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20860v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20860v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20860v1-abstract-full" style="display: none;"> Safe reinforcement learning (RL) is crucial for deploying RL agents in real-world applications, as it aims to maximize long-term rewards while satisfying safety constraints. However, safe RL often suffers from sample inefficiency, requiring extensive interactions with the environment to learn a safe policy. We propose Efficient Safe Policy Optimization (ESPO), a novel approach that enhances the efficiency of safe RL through sample manipulation. ESPO employs an optimization framework with three modes: maximizing rewards, minimizing costs, and balancing the trade-off between the two. By dynamically adjusting the sampling process based on the observed conflict between reward and safety gradients, ESPO theoretically guarantees convergence, optimization stability, and improved sample complexity bounds. Experiments on the Safety-MuJoCo and Omnisafe benchmarks demonstrate that ESPO significantly outperforms existing primal-based and primal-dual-based baselines in terms of reward maximization and constraint satisfaction. Moreover, ESPO achieves substantial gains in sample efficiency, requiring 25--29% fewer samples than baselines, and reduces training time by 21--38%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20860v1-abstract-full').style.display = 'none'; document.getElementById('2405.20860v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16390">arXiv:2405.16390</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.16390">pdf</a>, <a href="https://arxiv.org/format/2405.16390">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+S">Shangding Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Sel%2C+B">Bilgehan Sel</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yuhao Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Q">Qingwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16390v1-abstract-short" style="display: inline;"> In numerous reinforcement learning (RL) problems involving safety-critical systems, a key challenge lies in balancing multiple objectives while simultaneously meeting all stringent safety constraints. To tackle this issue, we propose a primal-based framework that orchestrates policy optimization between multi-objective learning and constraint adherence. Our method employs a novel natural policy gr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16390v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16390v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16390v1-abstract-full" style="display: none;"> In numerous reinforcement learning (RL) problems involving safety-critical systems, a key challenge lies in balancing multiple objectives while simultaneously meeting all stringent safety constraints. To tackle this issue, we propose a primal-based framework that orchestrates policy optimization between multi-objective learning and constraint adherence. Our method employs a novel natural policy gradient manipulation method to optimize multiple RL objectives and overcome conflicting gradients between different tasks, since the simple weighted average gradient direction may not be beneficial for specific tasks&#39; performance due to misaligned gradients of different task objectives. When there is a violation of a hard constraint, our algorithm steps in to rectify the policy to minimize this violation. We establish theoretical convergence and constraint violation guarantees in a tabular setting. Empirically, our proposed method also outperforms prior state-of-the-art methods on challenging safe multi-objective reinforcement learning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16390v1-abstract-full').style.display = 'none'; document.getElementById('2405.16390v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11955">arXiv:2405.11955</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.11955">pdf</a>, <a href="https://arxiv.org/format/2405.11955">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Pattern Formation and Solitons">nlin.PS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Shallow Recurrent Decoder for Reduced Order Modeling of Plasma Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kutz%2C+J+N">J. Nathan Kutz</a>, <a href="/search/cs?searchtype=author&amp;query=Reza%2C+M">Maryam Reza</a>, <a href="/search/cs?searchtype=author&amp;query=Faraji%2C+F">Farbod Faraji</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Aaron Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11955v1-abstract-short" style="display: inline;"> Reduced order models are becoming increasingly important for rendering complex and multiscale spatio-temporal dynamics computationally tractable. The computational efficiency of such surrogate models is especially important for design, exhaustive exploration and physical understanding. Plasma simulations, in particular those applied to the study of ${\bf E}\times {\bf B}$ plasma discharges and tec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11955v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11955v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11955v1-abstract-full" style="display: none;"> Reduced order models are becoming increasingly important for rendering complex and multiscale spatio-temporal dynamics computationally tractable. The computational efficiency of such surrogate models is especially important for design, exhaustive exploration and physical understanding. Plasma simulations, in particular those applied to the study of ${\bf E}\times {\bf B}$ plasma discharges and technologies, such as Hall thrusters, require substantial computational resources in order to resolve the multidimentional dynamics that span across wide spatial and temporal scales. Although high-fidelity computational tools are available to simulate such systems over limited conditions and in highly simplified geometries, simulations of full-size systems and/or extensive parametric studies over many geometric configurations and under different physical conditions are computationally intractable with conventional numerical tools. Thus, scientific studies and industrially oriented modeling of plasma systems, including the important ${\bf E}\times {\bf B}$ technologies, stand to significantly benefit from reduced order modeling algorithms. We develop a model reduction scheme based upon a {\em Shallow REcurrent Decoder} (SHRED) architecture. The scheme uses a neural network for encoding limited sensor measurements in time (sequence-to-sequence encoding) to full state-space reconstructions via a decoder network. Based upon the theory of separation of variables, the SHRED architecture is capable of (i) reconstructing full spatio-temporal fields with as little as three point sensors, even the fields that are not measured with sensor feeds but that are in dynamic coupling with the measured field, and (ii) forecasting the future state of the system using neural network roll-outs from the trained time encoding model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11955v1-abstract-full').style.display = 'none'; document.getElementById('2405.11955v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06782">arXiv:2405.06782</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.06782">pdf</a>, <a href="https://arxiv.org/format/2405.06782">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GraphRelate3D: Context-Dependent 3D Object Detection with Inter-Object Relationship Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Mingyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yurtsever%2C+E">Ekim Yurtsever</a>, <a href="/search/cs?searchtype=author&amp;query=Brede%2C+M">Marc Brede</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+J">Jun Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xingcheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zagar%2C+B+L">Bare Luka Zagar</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+Y">Yuning Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06782v1-abstract-short" style="display: inline;"> Accurate and effective 3D object detection is critical for ensuring the driving safety of autonomous vehicles. Recently, state-of-the-art two-stage 3D object detectors have exhibited promising performance. However, these methods refine proposals individually, ignoring the rich contextual information in the object relationships between the neighbor proposals. In this study, we introduce an object r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06782v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06782v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06782v1-abstract-full" style="display: none;"> Accurate and effective 3D object detection is critical for ensuring the driving safety of autonomous vehicles. Recently, state-of-the-art two-stage 3D object detectors have exhibited promising performance. However, these methods refine proposals individually, ignoring the rich contextual information in the object relationships between the neighbor proposals. In this study, we introduce an object relation module, consisting of a graph generator and a graph neural network (GNN), to learn the spatial information from certain patterns to improve 3D object detection. Specifically, we create an inter-object relationship graph based on proposals in a frame via the graph generator to connect each proposal with its neighbor proposals. Afterward, the GNN module extracts edge features from the generated graph and iteratively refines proposal features with the captured edge features. Ultimately, we leverage the refined features as input to the detection head to obtain detection results. Our approach improves upon the baseline PV-RCNN on the KITTI validation set for the car class across easy, moderate, and hard difficulty levels by 0.82%, 0.74%, and 0.58%, respectively. Additionally, our method outperforms the baseline by more than 1% under the moderate and hard levels BEV AP on the test server. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06782v1-abstract-full').style.display = 'none'; document.getElementById('2405.06782v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01750">arXiv:2405.01750</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.01750">pdf</a>, <a href="https://arxiv.org/format/2405.01750">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PointCompress3D: A Point Cloud Compression Framework for Roadside LiDARs in Intelligent Transportation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Pranamulia%2C+R">Ramandika Pranamulia</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xingcheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Mingyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A+C">Alois C. Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01750v2-abstract-short" style="display: inline;"> In the context of Intelligent Transportation Systems (ITS), efficient data compression is crucial for managing large-scale point cloud data acquired by roadside LiDAR sensors. The demand for efficient storage, streaming, and real-time object detection capabilities for point cloud data is substantial. This work introduces PointCompress3D, a novel point cloud compression framework tailored specifica&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01750v2-abstract-full').style.display = 'inline'; document.getElementById('2405.01750v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01750v2-abstract-full" style="display: none;"> In the context of Intelligent Transportation Systems (ITS), efficient data compression is crucial for managing large-scale point cloud data acquired by roadside LiDAR sensors. The demand for efficient storage, streaming, and real-time object detection capabilities for point cloud data is substantial. This work introduces PointCompress3D, a novel point cloud compression framework tailored specifically for roadside LiDARs. Our framework addresses the challenges of compressing high-resolution point clouds while maintaining accuracy and compatibility with roadside LiDAR sensors. We adapt, extend, integrate, and evaluate three cutting-edge compression methods using our real-world-based TUMTraf dataset family. We achieve a frame rate of 10 FPS while keeping compression sizes below 105 Kb, a reduction of 50 times, and maintaining object detection performance on par with the original data. In extensive experiments and ablation studies, we finally achieved a PSNR d2 of 94.46 and a BPP of 6.54 on our dataset. Future work includes the deployment on the live system. The code is available on our project website: https://pointcompress3d.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01750v2-abstract-full').style.display = 'none'; document.getElementById('2405.01750v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01677">arXiv:2405.01677</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.01677">pdf</a>, <a href="https://arxiv.org/format/2405.01677">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Balance Reward and Safety Optimization for Safe Reinforcement Learning: A Perspective of Gradient Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+S">Shangding Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Sel%2C+B">Bilgehan Sel</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yuhao Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Q">Qingwei Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+M">Ming Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01677v2-abstract-short" style="display: inline;"> Ensuring the safety of Reinforcement Learning (RL) is crucial for its deployment in real-world applications. Nevertheless, managing the trade-off between reward and safety during exploration presents a significant challenge. Improving reward performance through policy adjustments may adversely affect safety performance. In this study, we aim to address this conflicting relation by leveraging the t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01677v2-abstract-full').style.display = 'inline'; document.getElementById('2405.01677v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01677v2-abstract-full" style="display: none;"> Ensuring the safety of Reinforcement Learning (RL) is crucial for its deployment in real-world applications. Nevertheless, managing the trade-off between reward and safety during exploration presents a significant challenge. Improving reward performance through policy adjustments may adversely affect safety performance. In this study, we aim to address this conflicting relation by leveraging the theory of gradient manipulation. Initially, we analyze the conflict between reward and safety gradients. Subsequently, we tackle the balance between reward and safety optimization by proposing a soft switching policy optimization method, for which we provide convergence analysis. Based on our theoretical examination, we provide a safe RL framework to overcome the aforementioned challenge, and we develop a Safety-MuJoCo Benchmark to assess the performance of safe RL algorithms. Finally, we evaluate the effectiveness of our method on the Safety-MuJoCo Benchmark and a popular safe RL benchmark, Omnisafe. Experimental results demonstrate that our algorithms outperform several state-of-the-art baselines in terms of balancing reward and safety optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01677v2-abstract-full').style.display = 'none'; document.getElementById('2405.01677v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.12683">arXiv:2404.12683</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.12683">pdf</a>, <a href="https://arxiv.org/format/2404.12683">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A Containerized Microservice Architecture for a ROS 2 Autonomous Driving Software: An End-to-End Latency Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Betz%2C+T">Tobias Betz</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+L">Long Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+F">Fengjunjie Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Kaljavesi%2C+G">Gemb Kaljavesi</a>, <a href="/search/cs?searchtype=author&amp;query=Zuepke%2C+A">Alexander Zuepke</a>, <a href="/search/cs?searchtype=author&amp;query=Bastoni%2C+A">Andrea Bastoni</a>, <a href="/search/cs?searchtype=author&amp;query=Caccamo%2C+M">Marco Caccamo</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Betz%2C+J">Johannes Betz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.12683v1-abstract-short" style="display: inline;"> The automotive industry is transitioning from traditional ECU-based systems to software-defined vehicles. A central role of this revolution is played by containers, lightweight virtualization technologies that enable the flexible consolidation of complex software applications on a common hardware platform. Despite their widespread adoption, the impact of containerization on fundamental real-time m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12683v1-abstract-full').style.display = 'inline'; document.getElementById('2404.12683v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.12683v1-abstract-full" style="display: none;"> The automotive industry is transitioning from traditional ECU-based systems to software-defined vehicles. A central role of this revolution is played by containers, lightweight virtualization technologies that enable the flexible consolidation of complex software applications on a common hardware platform. Despite their widespread adoption, the impact of containerization on fundamental real-time metrics such as end-to-end latency, communication jitter, as well as memory and CPU utilization has remained virtually unexplored. This paper presents a microservice architecture for a real-world autonomous driving application where containers isolate each service. Our comprehensive evaluation shows the benefits in terms of end-to-end latency of such a solution even over standard bare-Linux deployments. Specifically, in the case of the presented microservice architecture, the mean end-to-end latency can be improved by 5-8 %. Also, the maximum latencies were significantly reduced using container deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12683v1-abstract-full').style.display = 'none'; document.getElementById('2404.12683v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08844">arXiv:2404.08844</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.08844">pdf</a>, <a href="https://arxiv.org/format/2404.08844">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-fingered Robotic Hand Grasping in Cluttered Environments through Hand-object Contact Semantic Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+K">Kaixin Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+G">Guowen Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhaopeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianwei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08844v2-abstract-short" style="display: inline;"> The deep learning models has significantly advanced dexterous manipulation techniques for multi-fingered hand grasping. However, the contact information-guided grasping in cluttered environments remains largely underexplored. To address this gap, we have developed a method for generating multi-fingered hand grasp samples in cluttered settings through contact semantic map. We introduce a contact se&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08844v2-abstract-full').style.display = 'inline'; document.getElementById('2404.08844v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08844v2-abstract-full" style="display: none;"> The deep learning models has significantly advanced dexterous manipulation techniques for multi-fingered hand grasping. However, the contact information-guided grasping in cluttered environments remains largely underexplored. To address this gap, we have developed a method for generating multi-fingered hand grasp samples in cluttered settings through contact semantic map. We introduce a contact semantic conditional variational autoencoder network (CoSe-CVAE) for creating comprehensive contact semantic map from object point cloud. We utilize grasp detection method to estimate hand grasp poses from the contact semantic map. Finally, an unified grasp evaluation model is designed to assess grasp quality and collision probability, substantially improving the reliability of identifying optimal grasps in cluttered scenarios. Our grasp generation method has demonstrated remarkable success, outperforming state-of-the-art methods by at least 4.65% with 81.0% average grasping success rate in real-world single-object environment and 75.3% grasping success rate in cluttered scenes. We also proposed the multi-modal multi-fingered grasping dataset generation method. Our multi-fingered hand grasping dataset outperforms previous datasets in scene diversity, modality diversity. The dataset, code and supplementary materials can be found at https://sites.google.com/view/ffh-cluttered-grasping. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08844v2-abstract-full').style.display = 'none'; document.getElementById('2404.08844v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05508">arXiv:2404.05508</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.05508">pdf</a>, <a href="https://arxiv.org/format/2404.05508">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Synergy of Large Language Model and Model Driven Engineering for Automated Development of Centralized Vehicular Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Petrovic%2C+N">Nenad Petrovic</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+F">Fengjunjie Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Lebioda%2C+K">Krzysztof Lebioda</a>, <a href="/search/cs?searchtype=author&amp;query=Zolfaghari%2C+V">Vahid Zolfaghari</a>, <a href="/search/cs?searchtype=author&amp;query=Kirchner%2C+S">Sven Kirchner</a>, <a href="/search/cs?searchtype=author&amp;query=Purschke%2C+N">Nils Purschke</a>, <a href="/search/cs?searchtype=author&amp;query=Khan%2C+M+A">Muhammad Aqib Khan</a>, <a href="/search/cs?searchtype=author&amp;query=Vorobev%2C+V">Viktor Vorobev</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05508v1-abstract-short" style="display: inline;"> We present a prototype of a tool leveraging the synergy of model driven engineering (MDE) and Large Language Models (LLM) for the purpose of software development process automation in the automotive industry. In this approach, the user-provided input is free form textual requirements, which are first translated to Ecore model instance representation using an LLM, which is afterwards checked for co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05508v1-abstract-full').style.display = 'inline'; document.getElementById('2404.05508v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05508v1-abstract-full" style="display: none;"> We present a prototype of a tool leveraging the synergy of model driven engineering (MDE) and Large Language Models (LLM) for the purpose of software development process automation in the automotive industry. In this approach, the user-provided input is free form textual requirements, which are first translated to Ecore model instance representation using an LLM, which is afterwards checked for consistency using Object Constraint Language (OCL) rules. After successful consistency check, the model instance is fed as input to another LLM for the purpose of code generation. The generated code is evaluated in a simulated environment using CARLA simulator connected to an example centralized vehicle architecture, in an emergency brake scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05508v1-abstract-full').style.display = 'none'; document.getElementById('2404.05508v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> TUM-I24109 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.1; D.2.2; D.2.4; I.2.7; I.2.2; I.7.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05423">arXiv:2404.05423</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.05423">pdf</a>, <a href="https://arxiv.org/format/2404.05423">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Residual Chain Prediction for Autonomous Driving Path Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+L">Liguo Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yirui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Huaming Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05423v1-abstract-short" style="display: inline;"> In the rapidly evolving field of autonomous driving systems, the refinement of path planning algorithms is paramount for navigating vehicles through dynamic environments, particularly in complex urban scenarios. Traditional path planning algorithms, which are heavily reliant on static rules and manually defined parameters, often fall short in such contexts, highlighting the need for more adaptive,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05423v1-abstract-full').style.display = 'inline'; document.getElementById('2404.05423v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05423v1-abstract-full" style="display: none;"> In the rapidly evolving field of autonomous driving systems, the refinement of path planning algorithms is paramount for navigating vehicles through dynamic environments, particularly in complex urban scenarios. Traditional path planning algorithms, which are heavily reliant on static rules and manually defined parameters, often fall short in such contexts, highlighting the need for more adaptive, learning-based approaches. Among these, behavior cloning emerges as a noteworthy strategy for its simplicity and efficiency, especially within the realm of end-to-end path planning. However, behavior cloning faces challenges, such as covariate shift when employing traditional Manhattan distance as the metric. Addressing this, our study introduces the novel concept of Residual Chain Loss. Residual Chain Loss dynamically adjusts the loss calculation process to enhance the temporal dependency and accuracy of predicted path points, significantly improving the model&#39;s performance without additional computational overhead. Through testing on the nuScenes dataset, we underscore the method&#39;s substantial advancements in addressing covariate shift, facilitating dynamic loss adjustments, and ensuring seamless integration with end-to-end path planning frameworks. Our findings highlight the potential of Residual Chain Loss to revolutionize planning component of autonomous driving systems, marking a significant step forward in the quest for level 5 autonomous driving system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05423v1-abstract-full').style.display = 'none'; document.getElementById('2404.05423v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.19916">arXiv:2403.19916</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.19916">pdf</a>, <a href="https://arxiv.org/format/2403.19916">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.inffus.2024.102379">10.1016/j.inffus.2024.102379 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fusion Dynamical Systems with Machine Learning in Imitation Learning: A Comprehensive Overview </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Y">Yingbai Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Abu-Dakka%2C+F+J">Fares J. Abu-Dakka</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+F">Fei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.19916v1-abstract-short" style="display: inline;"> Imitation Learning (IL), also referred to as Learning from Demonstration (LfD), holds significant promise for capturing expert motor skills through efficient imitation, facilitating adept navigation of complex scenarios. A persistent challenge in IL lies in extending generalization from historical demonstrations, enabling the acquisition of new skills without re-teaching. Dynamical system-based IL&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19916v1-abstract-full').style.display = 'inline'; document.getElementById('2403.19916v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.19916v1-abstract-full" style="display: none;"> Imitation Learning (IL), also referred to as Learning from Demonstration (LfD), holds significant promise for capturing expert motor skills through efficient imitation, facilitating adept navigation of complex scenarios. A persistent challenge in IL lies in extending generalization from historical demonstrations, enabling the acquisition of new skills without re-teaching. Dynamical system-based IL (DSIL) emerges as a significant subset of IL methodologies, offering the ability to learn trajectories via movement primitives and policy learning based on experiential abstraction. This paper emphasizes the fusion of theoretical paradigms, integrating control theory principles inherent in dynamical systems into IL. This integration notably enhances robustness, adaptability, and convergence in the face of novel scenarios. This survey aims to present a comprehensive overview of DSIL methods, spanning from classical approaches to recent advanced approaches. We categorize DSIL into autonomous dynamical systems and non-autonomous dynamical systems, surveying traditional IL methods with low-dimensional input and advanced deep IL methods with high-dimensional input. Additionally, we present and analyze three main stability methods for IL: Lyapunov stability, contraction theory, and diffeomorphism mapping. Our exploration also extends to popular policy improvement methods for DSIL, encompassing reinforcement learning, deep reinforcement learning, and evolutionary strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19916v1-abstract-full').style.display = 'none'; document.getElementById('2403.19916v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15474">arXiv:2403.15474</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.15474">pdf</a>, <a href="https://arxiv.org/format/2403.15474">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/IROS58592.2024.10801740">10.1109/IROS58592.2024.10801740 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> EC-IoU: Orienting Safety for Object Detectors via Ego-Centric Intersection-over-Union </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liao%2C+B+H">Brian Hsuan-Cheng Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+C">Chih-Hong Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Esen%2C+H">Hasan Esen</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15474v2-abstract-short" style="display: inline;"> This paper presents Ego-Centric Intersection-over-Union (EC-IoU), addressing the limitation of the standard IoU measure in characterizing safety-related performance for object detectors in navigating contexts. Concretely, we propose a weighting mechanism to refine IoU, allowing it to assign a higher score to a prediction that covers closer points of a ground-truth object from the ego agent&#39;s persp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15474v2-abstract-full').style.display = 'inline'; document.getElementById('2403.15474v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15474v2-abstract-full" style="display: none;"> This paper presents Ego-Centric Intersection-over-Union (EC-IoU), addressing the limitation of the standard IoU measure in characterizing safety-related performance for object detectors in navigating contexts. Concretely, we propose a weighting mechanism to refine IoU, allowing it to assign a higher score to a prediction that covers closer points of a ground-truth object from the ego agent&#39;s perspective. The proposed EC-IoU measure can be used in typical evaluation processes to select object detectors with better safety-related performance for downstream tasks. It can also be integrated into common loss functions for model fine-tuning. While geared towards safety, our experiment with the KITTI dataset demonstrates the performance of a model trained on EC-IoU can be better than that of a variant trained on IoU in terms of mean Average Precision as well. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15474v2-abstract-full').style.display = 'none'; document.getElementById('2403.15474v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages (IEEE double column format), 7 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14614">arXiv:2403.14614</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.14614">pdf</a>, <a href="https://arxiv.org/format/2403.14614">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AdaIR: Adaptive All-in-One Image Restoration via Frequency Mining and Modulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+Y">Yuning Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zamir%2C+S+W">Syed Waqas Zamir</a>, <a href="/search/cs?searchtype=author&amp;query=Khan%2C+S">Salman Khan</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Shah%2C+M">Mubarak Shah</a>, <a href="/search/cs?searchtype=author&amp;query=Khan%2C+F+S">Fahad Shahbaz Khan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14614v1-abstract-short" style="display: inline;"> In the image acquisition process, various forms of degradation, including noise, haze, and rain, are frequently introduced. These degradations typically arise from the inherent limitations of cameras or unfavorable ambient conditions. To recover clean images from degraded versions, numerous specialized restoration methods have been developed, each targeting a specific type of degradation. Recently&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14614v1-abstract-full').style.display = 'inline'; document.getElementById('2403.14614v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14614v1-abstract-full" style="display: none;"> In the image acquisition process, various forms of degradation, including noise, haze, and rain, are frequently introduced. These degradations typically arise from the inherent limitations of cameras or unfavorable ambient conditions. To recover clean images from degraded versions, numerous specialized restoration methods have been developed, each targeting a specific type of degradation. Recently, all-in-one algorithms have garnered significant attention by addressing different types of degradations within a single model without requiring prior information of the input degradation type. However, these methods purely operate in the spatial domain and do not delve into the distinct frequency variations inherent to different degradation types. To address this gap, we propose an adaptive all-in-one image restoration network based on frequency mining and modulation. Our approach is motivated by the observation that different degradation types impact the image content on different frequency subbands, thereby requiring different treatments for each restoration task. Specifically, we first mine low- and high-frequency information from the input features, guided by the adaptively decoupled spectra of the degraded image. The extracted features are then modulated by a bidirectional operator to facilitate interactions between different frequency components. Finally, the modulated features are merged into the original input for a progressively guided restoration. With this approach, the model achieves adaptive reconstruction by accentuating the informative frequency subbands according to different input degradations. Extensive experiments demonstrate that the proposed method achieves state-of-the-art performance on different image restoration tasks, including denoising, dehazing, deraining, motion deblurring, and low-light image enhancement. Our code is available at https://github.com/c-yn/AdaIR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14614v1-abstract-full').style.display = 'none'; document.getElementById('2403.14614v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages,15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14460">arXiv:2403.14460</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.14460">pdf</a>, <a href="https://arxiv.org/format/2403.14460">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Single-System Illusion in Software-Defined Vehicles -- Automated, AI-Powered Workflow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lebioda%2C+K">Krzysztof Lebioda</a>, <a href="/search/cs?searchtype=author&amp;query=Vorobev%2C+V">Viktor Vorobev</a>, <a href="/search/cs?searchtype=author&amp;query=Petrovic%2C+N">Nenad Petrovic</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+F">Fengjunjie Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Zolfaghari%2C+V">Vahid Zolfaghari</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14460v1-abstract-short" style="display: inline;"> We propose a novel model- and feature-based approach to development of vehicle software systems, where the end architecture is not explicitly defined. Instead, it emerges from an iterative process of search and optimization given certain constraints, requirements and hardware architecture, while retaining the property of single-system illusion, where applications run in a logically uniform environ&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14460v1-abstract-full').style.display = 'inline'; document.getElementById('2403.14460v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14460v1-abstract-full" style="display: none;"> We propose a novel model- and feature-based approach to development of vehicle software systems, where the end architecture is not explicitly defined. Instead, it emerges from an iterative process of search and optimization given certain constraints, requirements and hardware architecture, while retaining the property of single-system illusion, where applications run in a logically uniform environment. One of the key points of the presented approach is the inclusion of modern generative AI, specifically Large Language Models (LLMs), in the loop. With the recent advances in the field, we expect that the LLMs will be able to assist in processing of requirements, generation of formal system models, as well as generation of software deployment specification and test code. The resulting pipeline is automated to a large extent, with feedback being generated at each step. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14460v1-abstract-full').style.display = 'none'; document.getElementById('2403.14460v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> TUM-I24108 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.1; D.2.2; D.2.4; I.2.7; I.2.2; I.7.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12193">arXiv:2403.12193</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.12193">pdf</a>, <a href="https://arxiv.org/format/2403.12193">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Continual Domain Randomization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Josifovski%2C+J">Josip Josifovski</a>, <a href="/search/cs?searchtype=author&amp;query=Auddy%2C+S">Sayantan Auddy</a>, <a href="/search/cs?searchtype=author&amp;query=Malmir%2C+M">Mohammadhossein Malmir</a>, <a href="/search/cs?searchtype=author&amp;query=Piater%2C+J">Justus Piater</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Navarro-Guerrero%2C+N">Nicol谩s Navarro-Guerrero</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12193v2-abstract-short" style="display: inline;"> Domain Randomization (DR) is commonly used for sim2real transfer of reinforcement learning (RL) policies in robotics. Most DR approaches require a simulator with a fixed set of tunable parameters from the start of the training, from which the parameters are randomized simultaneously to train a robust model for use in the real world. However, the combined randomization of many parameters increases&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12193v2-abstract-full').style.display = 'inline'; document.getElementById('2403.12193v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12193v2-abstract-full" style="display: none;"> Domain Randomization (DR) is commonly used for sim2real transfer of reinforcement learning (RL) policies in robotics. Most DR approaches require a simulator with a fixed set of tunable parameters from the start of the training, from which the parameters are randomized simultaneously to train a robust model for use in the real world. However, the combined randomization of many parameters increases the task difficulty and might result in sub-optimal policies. To address this problem and to provide a more flexible training process, we propose Continual Domain Randomization (CDR) for RL that combines domain randomization with continual learning to enable sequential training in simulation on a subset of randomization parameters at a time. Starting from a model trained in a non-randomized simulation where the task is easier to solve, the model is trained on a sequence of randomizations, and continual learning is employed to remember the effects of previous randomizations. Our robotic reaching and grasping tasks experiments show that the model trained in this fashion learns effectively in simulation and performs robustly on the real robot while matching or outperforming baselines that employ combined randomization or sequential randomization without continual learning. Our code and videos are available at https://continual-dr.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12193v2-abstract-full').style.display = 'none'; document.getElementById('2403.12193v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IROS 2024. Equal contribution from first two authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11788">arXiv:2403.11788</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.11788">pdf</a>, <a href="https://arxiv.org/format/2403.11788">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Locomotion Generation for a Rat Robot based on Environmental Changes via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shan%2C+X">Xinhui Shan</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yuhong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zitao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+X">Xiangtong Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+K">Kai Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11788v3-abstract-short" style="display: inline;"> This research focuses on developing reinforcement learning approaches for the locomotion generation of small-size quadruped robots. The rat robot NeRmo is employed as the experimental platform. Due to the constrained volume, small-size quadruped robots typically possess fewer and weaker sensors, resulting in difficulty in accurately perceiving and responding to environmental changes. In this conte&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11788v3-abstract-full').style.display = 'inline'; document.getElementById('2403.11788v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11788v3-abstract-full" style="display: none;"> This research focuses on developing reinforcement learning approaches for the locomotion generation of small-size quadruped robots. The rat robot NeRmo is employed as the experimental platform. Due to the constrained volume, small-size quadruped robots typically possess fewer and weaker sensors, resulting in difficulty in accurately perceiving and responding to environmental changes. In this context, insufficient and imprecise feedback data from sensors makes it difficult to generate adaptive locomotion based on reinforcement learning. To overcome these challenges, this paper proposes a novel reinforcement learning approach that focuses on extracting effective perceptual information to enhance the environmental adaptability of small-size quadruped robots. According to the frequency of a robot&#39;s gait stride, key information of sensor data is analyzed utilizing sinusoidal functions derived from Fourier transform results. Additionally, a multifunctional reward mechanism is proposed to generate adaptive locomotion in different tasks. Extensive simulations are conducted to assess the effectiveness of the proposed reinforcement learning approach in generating rat robot locomotion in various environments. The experiment results illustrate the capability of the proposed approach to maintain stable locomotion of a rat robot across different terrains, including ramps, stairs, and spiral stairs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11788v3-abstract-full').style.display = 'none'; document.getElementById('2403.11788v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08694">arXiv:2403.08694</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.08694">pdf</a>, <a href="https://arxiv.org/format/2403.08694">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TeaMs-RL: Teaching LLMs to Generate Better Instruction Datasets via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+S">Shangding Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08694v3-abstract-short" style="display: inline;"> The development of Large Language Models (LLMs) often confronts challenges stemming from the heavy reliance on human annotators in the reinforcement learning with human feedback (RLHF) framework, or the frequent and costly external queries tied to the self-instruct paradigm. In this work, we pivot to Reinforcement Learning (RL) -- but with a twist. Diverging from the typical RLHF, which refines LL&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08694v3-abstract-full').style.display = 'inline'; document.getElementById('2403.08694v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08694v3-abstract-full" style="display: none;"> The development of Large Language Models (LLMs) often confronts challenges stemming from the heavy reliance on human annotators in the reinforcement learning with human feedback (RLHF) framework, or the frequent and costly external queries tied to the self-instruct paradigm. In this work, we pivot to Reinforcement Learning (RL) -- but with a twist. Diverging from the typical RLHF, which refines LLMs following instruction data training, we use RL to directly generate the foundational instruction dataset that alone suffices for fine-tuning. Our method, TeaMs-RL, uses a suite of textual operations and rules, prioritizing the diversification of training datasets. It facilitates the generation of high-quality data without excessive reliance on external advanced models, paving the way for a single fine-tuning step and negating the need for subsequent RLHF stages. Our findings highlight key advantages of our approach: reduced need for human involvement and fewer model queries (only $5.73\%$ of the strong baseline&#39;s total), along with enhanced capabilities of LLMs in crafting and comprehending complex instructions compared to strong baselines, and substantially improved model privacy protection. Code is available at the link: https://github.com/SafeRL-Lab/TeaMs-RL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08694v3-abstract-full').style.display = 'none'; document.getElementById('2403.08694v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04149">arXiv:2403.04149</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.04149">pdf</a>, <a href="https://arxiv.org/format/2403.04149">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MAP: MAsk-Pruning for Source-Free Model Intellectual Property Protection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Peng%2C+B">Boyang Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+S">Sanqing Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+T">Tianpei Zou</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+L">Lianghua He</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=jiang%2C+c">changjun jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04149v1-abstract-short" style="display: inline;"> Deep learning has achieved remarkable progress in various applications, heightening the importance of safeguarding the intellectual property (IP) of well-trained models. It entails not only authorizing usage but also ensuring the deployment of models in authorized data domains, i.e., making models exclusive to certain target domains. Previous methods necessitate concurrent access to source trainin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04149v1-abstract-full').style.display = 'inline'; document.getElementById('2403.04149v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04149v1-abstract-full" style="display: none;"> Deep learning has achieved remarkable progress in various applications, heightening the importance of safeguarding the intellectual property (IP) of well-trained models. It entails not only authorizing usage but also ensuring the deployment of models in authorized data domains, i.e., making models exclusive to certain target domains. Previous methods necessitate concurrent access to source training data and target unauthorized data when performing IP protection, making them risky and inefficient for decentralized private data. In this paper, we target a practical setting where only a well-trained source model is available and investigate how we can realize IP protection. To achieve this, we propose a novel MAsk Pruning (MAP) framework. MAP stems from an intuitive hypothesis, i.e., there are target-related parameters in a well-trained model, locating and pruning them is the key to IP protection. Technically, MAP freezes the source model and learns a target-specific binary mask to prevent unauthorized data usage while minimizing performance degradation on authorized data. Moreover, we introduce a new metric aimed at achieving a better balance between source and target performance degradation. To verify the effectiveness and versatility, we have evaluated MAP in a variety of scenarios, including vanilla source-available, practical source-free, and challenging data-free. Extensive experiments indicate that MAP yields new state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04149v1-abstract-full').style.display = 'none'; document.getElementById('2403.04149v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03421">arXiv:2403.03421</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.03421">pdf</a>, <a href="https://arxiv.org/format/2403.03421">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LEAD: Learning Decomposition for Source-free Universal Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+S">Sanqing Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+T">Tianpei Zou</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+L">Lianghua He</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%B6hrbein%2C+F">Florian R枚hrbein</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+C">Changjun Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03421v1-abstract-short" style="display: inline;"> Universal Domain Adaptation (UniDA) targets knowledge transfer in the presence of both covariate and label shifts. Recently, Source-free Universal Domain Adaptation (SF-UniDA) has emerged to achieve UniDA without access to source data, which tends to be more practical due to data protection policies. The main challenge lies in determining whether covariate-shifted samples belong to target-private&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03421v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03421v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03421v1-abstract-full" style="display: none;"> Universal Domain Adaptation (UniDA) targets knowledge transfer in the presence of both covariate and label shifts. Recently, Source-free Universal Domain Adaptation (SF-UniDA) has emerged to achieve UniDA without access to source data, which tends to be more practical due to data protection policies. The main challenge lies in determining whether covariate-shifted samples belong to target-private unknown categories. Existing methods tackle this either through hand-crafted thresholding or by developing time-consuming iterative clustering strategies. In this paper, we propose a new idea of LEArning Decomposition (LEAD), which decouples features into source-known and -unknown components to identify target-private data. Technically, LEAD initially leverages the orthogonal decomposition analysis for feature decomposition. Then, LEAD builds instance-level decision boundaries to adaptively identify target-private data. Extensive experiments across various UniDA scenarios have demonstrated the effectiveness and superiority of LEAD. Notably, in the OPDA scenario on VisDA dataset, LEAD outperforms GLC by 3.5% overall H-score and reduces 75% time to derive pseudo-labeling decision boundaries. Besides, LEAD is also appealing in that it is complementary to most existing methods. The code is available at https://github.com/ispc-lab/LEAD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03421v1-abstract-full').style.display = 'none'; document.getElementById('2403.03421v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01532">arXiv:2403.01532</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.01532">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Data-driven local operator finding for reduced-order modelling of plasma systems: II. Application to parametric dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Faraji%2C+F">Farbod Faraji</a>, <a href="/search/cs?searchtype=author&amp;query=Reza%2C+M">Maryam Reza</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Aaron Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Kutz%2C+J+N">J. Nathan Kutz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01532v1-abstract-short" style="display: inline;"> Real-world systems often exhibit dynamics influenced by various parameters, either inherent or externally controllable, necessitating models capable of reliably capturing these parametric behaviors. Plasma technologies exemplify such systems. For example, phenomena governing global dynamics in Hall thrusters (a spacecraft propulsion technology) vary with various parameters, such as the &#34;self-susta&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01532v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01532v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01532v1-abstract-full" style="display: none;"> Real-world systems often exhibit dynamics influenced by various parameters, either inherent or externally controllable, necessitating models capable of reliably capturing these parametric behaviors. Plasma technologies exemplify such systems. For example, phenomena governing global dynamics in Hall thrusters (a spacecraft propulsion technology) vary with various parameters, such as the &#34;self-sustained electric field&#34;. In this Part II, following on the introduction of our novel data-driven local operator finding algorithm, Phi Method, in Part I, we showcase the method&#39;s effectiveness in learning parametric dynamics to predict system behavior across unseen parameter spaces. We present two adaptations: the &#34;parametric Phi Method&#34; and the &#34;ensemble Phi Method&#34;, which are demonstrated through 2D fluid-flow-past-a-cylinder and 1D Hall-thruster-plasma-discharge problems. Comparative evaluation against parametric OPT-DMD in the fluid case demonstrates superior predictive performance of the parametric Phi Method. Across both test cases, parametric and ensemble Phi Method reliably recover governing parametric PDEs and offer accurate predictions over test parameters. Ensemble ROM analysis underscores Phi Method&#39;s robust learning of dominant dynamic coefficients with high confidence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01532v1-abstract-full').style.display = 'none'; document.getElementById('2403.01532v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01523">arXiv:2403.01523</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.01523">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Data-driven local operator finding for reduced-order modelling of plasma systems: I. Concept and verifications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Faraji%2C+F">Farbod Faraji</a>, <a href="/search/cs?searchtype=author&amp;query=Reza%2C+M">Maryam Reza</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Aaron Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Kutz%2C+J+N">J. Nathan Kutz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01523v1-abstract-short" style="display: inline;"> Reduced-order plasma models that can efficiently predict plasma behavior across various settings and configurations are highly sought after yet elusive. The demand for such models has surged in the past decade due to their potential to facilitate scientific research and expedite the development of plasma technologies. In line with the advancements in computational power and data-driven methods, we&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01523v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01523v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01523v1-abstract-full" style="display: none;"> Reduced-order plasma models that can efficiently predict plasma behavior across various settings and configurations are highly sought after yet elusive. The demand for such models has surged in the past decade due to their potential to facilitate scientific research and expedite the development of plasma technologies. In line with the advancements in computational power and data-driven methods, we introduce the &#34;Phi Method&#34; in this two-part article. Part I presents this novel algorithm, which employs constrained regression on a candidate term library informed by numerical discretization schemes to discover discretized systems of differential equations. We demonstrate Phi Method&#39;s efficacy in deriving reliable and robust reduced-order models (ROMs) for three test cases: the Lorenz attractor, flow past a cylinder, and a 1D Hall-thruster-representative plasma. Part II will delve into the method&#39;s application for parametric dynamics discovery. Our results show that ROMs derived from the Phi Method provide remarkably accurate predictions of systems&#39; behavior, whether derived from steady-state or transient-state data. This underscores the method&#39;s potential for transforming plasma system modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01523v1-abstract-full').style.display = 'none'; document.getElementById('2403.01523v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 18 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01316">arXiv:2403.01316</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.01316">pdf</a>, <a href="https://arxiv.org/format/2403.01316">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TUMTraf V2X Cooperative Perception Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+W">Walter Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Wardana%2C+G+A">Gerhard Arya Wardana</a>, <a href="/search/cs?searchtype=author&amp;query=Sritharan%2C+S">Suren Sritharan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xingcheng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+R">Rui Song</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A+C">Alois C. Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01316v1-abstract-short" style="display: inline;"> Cooperative perception offers several benefits for enhancing the capabilities of autonomous vehicles and improving road safety. Using roadside sensors in addition to onboard sensors increases reliability and extends the sensor range. External sensors offer higher situational awareness for automated vehicles and prevent occlusions. We propose CoopDet3D, a cooperative multi-modal fusion model, and T&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01316v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01316v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01316v1-abstract-full" style="display: none;"> Cooperative perception offers several benefits for enhancing the capabilities of autonomous vehicles and improving road safety. Using roadside sensors in addition to onboard sensors increases reliability and extends the sensor range. External sensors offer higher situational awareness for automated vehicles and prevent occlusions. We propose CoopDet3D, a cooperative multi-modal fusion model, and TUMTraf-V2X, a perception dataset, for the cooperative 3D object detection and tracking task. Our dataset contains 2,000 labeled point clouds and 5,000 labeled images from five roadside and four onboard sensors. It includes 30k 3D boxes with track IDs and precise GPS and IMU data. We labeled eight categories and covered occlusion scenarios with challenging driving maneuvers, like traffic violations, near-miss events, overtaking, and U-turns. Through multiple experiments, we show that our CoopDet3D camera-LiDAR fusion model achieves an increase of +14.36 3D mAP compared to a vehicle camera-LiDAR fusion model. Finally, we make our dataset, model, labeling tool, and dev-kit publicly available on our website: https://tum-traffic-dataset.github.io/tumtraf-v2x. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01316v1-abstract-full').style.display = 'none'; document.getElementById('2403.01316v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00944">arXiv:2403.00944</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.00944">pdf</a>, <a href="https://arxiv.org/format/2403.00944">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Dynamic Balance in a Rat Robot via the Lateral Flexion of a Soft Actuated Spine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yuhong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zitao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhuang%2C+G">Genghang Zhuang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+K">Kai Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00944v1-abstract-short" style="display: inline;"> Balancing oneself using the spine is a physiological alignment of the body posture in the most efficient manner by the muscular forces for mammals. For this reason, we can see many disabled quadruped animals can still stand or walk even with three limbs. This paper investigates the optimization of dynamic balance during trot gait based on the spatial relationship between the center of mass (CoM) a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00944v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00944v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00944v1-abstract-full" style="display: none;"> Balancing oneself using the spine is a physiological alignment of the body posture in the most efficient manner by the muscular forces for mammals. For this reason, we can see many disabled quadruped animals can still stand or walk even with three limbs. This paper investigates the optimization of dynamic balance during trot gait based on the spatial relationship between the center of mass (CoM) and support area influenced by spinal flexion. During trotting, the robot balance is significantly influenced by the distance of the CoM to the support area formed by diagonal footholds. In this context, lateral spinal flexion, which is able to modify the position of footholds, holds promise for optimizing balance during trotting. This paper explores this phenomenon using a rat robot equipped with a soft actuated spine. Based on the lateral flexion of the spine, we establish a kinematic model to quantify the impact of spinal flexion on robot balance during trot gait. Subsequently, we develop an optimized controller for spinal flexion, designed to enhance balance without altering the leg locomotion. The effectiveness of our proposed controller is evaluated through extensive simulations and physical experiments conducted on a rat robot. Compared to both a non-spine based trot gait controller and a trot gait controller with lateral spinal flexion, our proposed optimized controller effectively improves the dynamic balance of the robot and retains the desired locomotion during trotting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00944v1-abstract-full').style.display = 'none'; document.getElementById('2403.00944v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18946">arXiv:2402.18946</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.18946">pdf</a>, <a href="https://arxiv.org/format/2402.18946">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Adaptive Safety-Critical Control with Gaussian Processes in High-Order Uncertain Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+L">Long Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+X">Xiangtong Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+L">Linghuan Kong</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+W">Wei He</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18946v2-abstract-short" style="display: inline;"> This paper presents an adaptive online learning framework for systems with uncertain parameters to ensure safety-critical control in non-stationary environments. Our approach consists of two phases. The initial phase is centered on a novel sparse Gaussian process (GP) framework. We first integrate a forgetting factor to refine a variational sparse GP algorithm, thus enhancing its adaptability. Sub&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18946v2-abstract-full').style.display = 'inline'; document.getElementById('2402.18946v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18946v2-abstract-full" style="display: none;"> This paper presents an adaptive online learning framework for systems with uncertain parameters to ensure safety-critical control in non-stationary environments. Our approach consists of two phases. The initial phase is centered on a novel sparse Gaussian process (GP) framework. We first integrate a forgetting factor to refine a variational sparse GP algorithm, thus enhancing its adaptability. Subsequently, the hyperparameters of the Gaussian model are trained with a specially compound kernel, and the Gaussian model&#39;s online inferential capability and computational efficiency are strengthened by updating a solitary inducing point derived from new samples, in conjunction with the learned hyperparameters. In the second phase, we propose a safety filter based on high-order control barrier functions (HOCBFs), synergized with the previously trained learning model. By leveraging the compound kernel from the first phase, we effectively address the inherent limitations of GPs in handling high-dimensional problems for real-time applications. The derived controller ensures a rigorous lower bound on the probability of satisfying the safety specification. Finally, the efficacy of our proposed algorithm is demonstrated through real-time obstacle avoidance experiments executed using both a simulation platform and a real-world 7-DOF robot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18946v2-abstract-full').style.display = 'none'; document.getElementById('2402.18946v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18925">arXiv:2402.18925</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.18925">pdf</a>, <a href="https://arxiv.org/format/2402.18925">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PCDepth: Pattern-based Complementary Learning for Monocular Depth Estimation by Best of Both Worlds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Haotian Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+S">Sanqing Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+F">Fan Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Bu%2C+Z">Zongtao Bu</a>, <a href="/search/cs?searchtype=author&amp;query=Roehrbein%2C+F">Florian Roehrbein</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18925v1-abstract-short" style="display: inline;"> Event cameras can record scene dynamics with high temporal resolution, providing rich scene details for monocular depth estimation (MDE) even at low-level illumination. Therefore, existing complementary learning approaches for MDE fuse intensity information from images and scene details from event data for better scene understanding. However, most methods directly fuse two modalities at pixel leve&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18925v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18925v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18925v1-abstract-full" style="display: none;"> Event cameras can record scene dynamics with high temporal resolution, providing rich scene details for monocular depth estimation (MDE) even at low-level illumination. Therefore, existing complementary learning approaches for MDE fuse intensity information from images and scene details from event data for better scene understanding. However, most methods directly fuse two modalities at pixel level, ignoring that the attractive complementarity mainly impacts high-level patterns that only occupy a few pixels. For example, event data is likely to complement contours of scene objects. In this paper, we discretize the scene into a set of high-level patterns to explore the complementarity and propose a Pattern-based Complementary learning architecture for monocular Depth estimation (PCDepth). Concretely, PCDepth comprises two primary components: a complementary visual representation learning module for discretizing the scene into high-level patterns and integrating complementary patterns across modalities and a refined depth estimator aimed at scene reconstruction and depth prediction while maintaining an efficiency-accuracy balance. Through pattern-based complementary learning, PCDepth fully exploits two modalities and achieves more accurate predictions than existing methods, especially in challenging nighttime scenarios. Extensive experiments on MVSEC and DSEC datasets verify the effectiveness and superiority of our PCDepth. Remarkably, compared with state-of-the-art, PCDepth achieves a 37.9% improvement in accuracy in MVSEC nighttime scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18925v1-abstract-full').style.display = 'none'; document.getElementById('2402.18925v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.16449">arXiv:2402.16449</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.16449">pdf</a>, <a href="https://arxiv.org/format/2402.16449">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Online Efficient Safety-Critical Control for Mobile Robots in Unknown Dynamic Multi-Obstacle Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+G">Guangyao Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+L">Long Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+X">Xiangtong Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Liding Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+W">Wei He</a>, <a href="/search/cs?searchtype=author&amp;query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.16449v1-abstract-short" style="display: inline;"> This paper proposes a LiDAR-based goal-seeking and exploration framework, addressing the efficiency of online obstacle avoidance in unstructured environments populated with static and moving obstacles. This framework addresses two significant challenges associated with traditional dynamic control barrier functions (D-CBFs): their online construction and the diminished real-time performance caused&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16449v1-abstract-full').style.display = 'inline'; document.getElementById('2402.16449v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.16449v1-abstract-full" style="display: none;"> This paper proposes a LiDAR-based goal-seeking and exploration framework, addressing the efficiency of online obstacle avoidance in unstructured environments populated with static and moving obstacles. This framework addresses two significant challenges associated with traditional dynamic control barrier functions (D-CBFs): their online construction and the diminished real-time performance caused by utilizing multiple D-CBFs. To tackle the first challenge, the framework&#39;s perception component begins with clustering point clouds via the DBSCAN algorithm, followed by encapsulating these clusters with the minimum bounding ellipses (MBEs) algorithm to create elliptical representations. By comparing the current state of MBEs with those stored from previous moments, the differentiation between static and dynamic obstacles is realized, and the Kalman filter is utilized to predict the movements of the latter. Such analysis facilitates the D-CBF&#39;s online construction for each MBE. To tackle the second challenge, we introduce buffer zones, generating Type-II D-CBFs online for each identified obstacle. Utilizing these buffer zones as activation areas substantially reduces the number of D-CBFs that need to be activated. Upon entering these buffer zones, the system prioritizes safety, autonomously navigating safe paths, and hence referred to as the exploration mode. Exiting these buffer zones triggers the system&#39;s transition to goal-seeking mode. We demonstrate that the system&#39;s states under this framework achieve safety and asymptotic stabilization. Experimental results in simulated and real-world environments have validated our framework&#39;s capability, allowing a LiDAR-equipped mobile robot to efficiently and safely reach the desired location within dynamic environments containing multiple obstacles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16449v1-abstract-full').style.display = 'none'; document.getElementById('2402.16449v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Knoll%2C+A&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10