Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 123 results for author: <span class="mathjax">Hu, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Hu%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hu, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hu%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hu, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hu%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hu%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hu%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hu%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11946">arXiv:2502.11946</a> <span> [<a href="https://arxiv.org/pdf/2502.11946">pdf</a>, <a href="https://arxiv.org/format/2502.11946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Step-Audio: Unified Understanding and Generation in Intelligent Speech Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+A">Ailin Huang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+B">Boyong Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+B">Bruce Wang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+C">Chao Yan</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+C">Chen Hu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+C">Chengli Feng</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+F">Fei Tian</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+F">Feiyu Shen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jingbei Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Mingrui Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/eess?searchtype=author&query=Miao%2C+R">Ruihang Miao</a>, <a href="/search/eess?searchtype=author&query=You%2C+W">Wang You</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xuerui Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yechang Huang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+Z">Zheng Gong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zixin Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jianjian Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+B">Brian Li</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+C">Chengting Feng</a>, <a href="/search/eess?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanpeng Hu</a> , et al. (120 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11946v2-abstract-short" style="display: inline;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11946v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11946v2-abstract-full" style="display: none;"> Real-time speech interaction, serving as a fundamental interface for human-machine collaboration, holds immense potential. However, current open-source models face limitations such as high costs in voice data collection, weakness in dynamic control, and limited intelligence. To address these challenges, this paper introduces Step-Audio, the first production-ready open-source solution. Key contributions include: 1) a 130B-parameter unified speech-text multi-modal model that achieves unified understanding and generation, with the Step-Audio-Chat version open-sourced; 2) a generative speech data engine that establishes an affordable voice cloning framework and produces the open-sourced lightweight Step-Audio-TTS-3B model through distillation; 3) an instruction-driven fine control system enabling dynamic adjustments across dialects, emotions, singing, and RAP; 4) an enhanced cognitive architecture augmented with tool calling and role-playing abilities to manage complex tasks effectively. Based on our new StepEval-Audio-360 evaluation benchmark, Step-Audio achieves state-of-the-art performance in human evaluations, especially in terms of instruction following. On open-source benchmarks like LLaMA Question, shows 9.3% average performance improvement, demonstrating our commitment to advancing the development of open-source multi-modal language technologies. Our code and models are available at https://github.com/stepfun-ai/Step-Audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11946v2-abstract-full').style.display = 'none'; document.getElementById('2502.11946v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18758">arXiv:2501.18758</a> <span> [<a href="https://arxiv.org/pdf/2501.18758">pdf</a>, <a href="https://arxiv.org/format/2501.18758">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> A New Statistical Approach to the Performance Analysis of Vision-based Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haozhou Hu</a>, <a href="/search/eess?searchtype=author&query=Dhillon%2C+H+S">Harpreet S. Dhillon</a>, <a href="/search/eess?searchtype=author&query=Buehrer%2C+R+M">R. Michael Buehrer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18758v1-abstract-short" style="display: inline;"> Many modern wireless devices with accurate positioning needs also have access to vision sensors, such as a camera, radar, and Light Detection and Ranging (LiDAR). In scenarios where wireless-based positioning is either inaccurate or unavailable, using information from vision sensors becomes highly desirable for determining the precise location of the wireless device. Specifically, vision data can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18758v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18758v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18758v1-abstract-full" style="display: none;"> Many modern wireless devices with accurate positioning needs also have access to vision sensors, such as a camera, radar, and Light Detection and Ranging (LiDAR). In scenarios where wireless-based positioning is either inaccurate or unavailable, using information from vision sensors becomes highly desirable for determining the precise location of the wireless device. Specifically, vision data can be used to estimate distances between the target (where the sensors are mounted) and nearby landmarks. However, a significant challenge in positioning using these measurements is the inability to uniquely identify which specific landmark is visible in the data. For instance, when the target is located close to a lamppost, it becomes challenging to precisely identify the specific lamppost (among several in the region) that is near the target. This work proposes a new framework for target localization using range measurements to multiple proximate landmarks. The geometric constraints introduced by these measurements are utilized to narrow down candidate landmark combinations corresponding to the range measurements and, consequently, the target's location on a map. By modeling landmarks as a marked Poisson point process (PPP), we show that three noise-free range measurements are sufficient to uniquely determine the correct combination of landmarks in a two-dimensional plane. For noisy measurements, we provide a mathematical characterization of the probability of correctly identifying the observed landmark combination based on a novel joint distribution of key random variables. Our results demonstrate that the landmark combination can be identified using ranges, even when individual landmarks are visually indistinguishable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18758v1-abstract-full').style.display = 'none'; document.getElementById('2501.18758v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15496">arXiv:2501.15496</a> <span> [<a href="https://arxiv.org/pdf/2501.15496">pdf</a>, <a href="https://arxiv.org/format/2501.15496">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Variational Bayesian Adaptive Learning of Deep Latent Variables for Acoustic Knowledge Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hu Hu</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15496v1-abstract-short" style="display: inline;"> In this work, we propose a novel variational Bayesian adaptive learning approach for cross-domain knowledge transfer to address acoustic mismatches between training and testing conditions, such as recording devices and environmental noise. Different from the traditional Bayesian approaches that impose uncertainties on model parameters risking the curse of dimensionality due to the huge number of p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15496v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15496v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15496v1-abstract-full" style="display: none;"> In this work, we propose a novel variational Bayesian adaptive learning approach for cross-domain knowledge transfer to address acoustic mismatches between training and testing conditions, such as recording devices and environmental noise. Different from the traditional Bayesian approaches that impose uncertainties on model parameters risking the curse of dimensionality due to the huge number of parameters, we focus on estimating a manageable number of latent variables in deep neural models. Knowledge learned from a source domain is thus encoded in prior distributions of deep latent variables and optimally combined, in a Bayesian sense, with a small set of adaptation data from a target domain to approximate the corresponding posterior distributions. Two different strategies are proposed and investigated to estimate the posterior distributions: Gaussian mean-field variational inference, and empirical Bayes. These strategies address the presence or absence of parallel data in the source and target domains. Furthermore, structural relationship modeling is investigated to enhance the approximation. We evaluated our proposed approaches on two acoustic adaptation tasks: 1) device adaptation for acoustic scene classification, and 2) noise adaptation for spoken command recognition. Experimental results show that the proposed variational Bayesian adaptive learning approach can obtain good improvements on target domain data, and consistently outperforms state-of-the-art knowledge transfer methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15496v1-abstract-full').style.display = 'none'; document.getElementById('2501.15496v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to TASLP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19134">arXiv:2412.19134</a> <span> [<a href="https://arxiv.org/pdf/2412.19134">pdf</a>, <a href="https://arxiv.org/format/2412.19134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Extended Cross-Modality United Learning for Unsupervised Visible-Infrared Person Re-identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+R">Ruixing Wu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yiming Yang</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Jiakai He</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haifeng Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19134v1-abstract-short" style="display: inline;"> Unsupervised learning visible-infrared person re-identification (USL-VI-ReID) aims to learn modality-invariant features from unlabeled cross-modality datasets and reduce the inter-modality gap. However, the existing methods lack cross-modality clustering or excessively pursue cluster-level association, which makes it difficult to perform reliable modality-invariant features learning. To deal with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19134v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19134v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19134v1-abstract-full" style="display: none;"> Unsupervised learning visible-infrared person re-identification (USL-VI-ReID) aims to learn modality-invariant features from unlabeled cross-modality datasets and reduce the inter-modality gap. However, the existing methods lack cross-modality clustering or excessively pursue cluster-level association, which makes it difficult to perform reliable modality-invariant features learning. To deal with this issue, we propose a Extended Cross-Modality United Learning (ECUL) framework, incorporating Extended Modality-Camera Clustering (EMCC) and Two-Step Memory Updating Strategy (TSMem) modules. Specifically, we design ECUL to naturally integrates intra-modality clustering, inter-modality clustering and inter-modality instance selection, establishing compact and accurate cross-modality associations while reducing the introduction of noisy labels. Moreover, EMCC captures and filters the neighborhood relationships by extending the encoding vector, which further promotes the learning of modality-invariant and camera-invariant knowledge in terms of clustering algorithm. Finally, TSMem provides accurate and generalized proxy points for contrastive learning by updating the memory in stages. Extensive experiments results on SYSU-MM01 and RegDB datasets demonstrate that the proposed ECUL shows promising performance and even outperforms certain supervised methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19134v1-abstract-full').style.display = 'none'; document.getElementById('2412.19134v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13037">arXiv:2412.13037</a> <span> [<a href="https://arxiv.org/pdf/2412.13037">pdf</a>, <a href="https://arxiv.org/format/2412.13037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> TAME: Temporal Audio-based Mamba for Enhanced Drone Trajectory Estimation and Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyuan Xiao</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Huanran Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+G">Guili Xu</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Junwei He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13037v6-abstract-short" style="display: inline;"> The increasing prevalence of compact UAVs has introduced significant risks to public safety, while traditional drone detection systems are often bulky and costly. To address these challenges, we present TAME, the Temporal Audio-based Mamba for Enhanced Drone Trajectory Estimation and Classification. This innovative anti-UAV detection model leverages a parallel selective state-space model to simult… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13037v6-abstract-full').style.display = 'inline'; document.getElementById('2412.13037v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13037v6-abstract-full" style="display: none;"> The increasing prevalence of compact UAVs has introduced significant risks to public safety, while traditional drone detection systems are often bulky and costly. To address these challenges, we present TAME, the Temporal Audio-based Mamba for Enhanced Drone Trajectory Estimation and Classification. This innovative anti-UAV detection model leverages a parallel selective state-space model to simultaneously capture and learn both the temporal and spectral features of audio, effectively analyzing propagation of sound. To further enhance temporal features, we introduce a Temporal Feature Enhancement Module, which integrates spectral features into temporal data using residual cross-attention. This enhanced temporal information is then employed for precise 3D trajectory estimation and classification. Our model sets a new standard of performance on the MMUAD benchmarks, demonstrating superior accuracy and effectiveness. The code and trained models are publicly available on GitHub \url{https://github.com/AmazingDay1/TAME}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13037v6-abstract-full').style.display = 'none'; document.getElementById('2412.13037v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for presentation at the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) 2025. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15643">arXiv:2411.15643</a> <span> [<a href="https://arxiv.org/pdf/2411.15643">pdf</a>, <a href="https://arxiv.org/format/2411.15643">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> On the Boundary Feasibility for PDE Control with Neural Operators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanjiang Hu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Changliu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15643v1-abstract-short" style="display: inline;"> The physical world dynamics are generally governed by underlying partial differential equations (PDEs) with unknown analytical forms in science and engineering problems. Neural network based data-driven approaches have been heavily studied in simulating and solving PDE problems in recent years, but it is still challenging to move forward from understanding to controlling the unknown PDE dynamics.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15643v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15643v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15643v1-abstract-full" style="display: none;"> The physical world dynamics are generally governed by underlying partial differential equations (PDEs) with unknown analytical forms in science and engineering problems. Neural network based data-driven approaches have been heavily studied in simulating and solving PDE problems in recent years, but it is still challenging to move forward from understanding to controlling the unknown PDE dynamics. PDE boundary control instantiates a simplified but important problem by only focusing on PDE boundary conditions as the control input and output. However, current model-free PDE controllers cannot ensure the boundary output satisfies some given user-specified safety constraint. To this end, we propose a safety filtering framework to guarantee the boundary output stays within the safe set for current model-free controllers. Specifically, we first introduce a general neural boundary control barrier function (BCBF) to ensure the feasibility of the trajectorywise constraint satisfaction of boundary output. Based on a neural operator modeling the transfer function from boundary control input to output trajectories, we show that the change in the BCBF depends linearly on the change in input boundary, so quadratic programming-based safety filtering can be done for pre-trained model-free controllers. Extensive experiments under challenging hyperbolic, parabolic and Navier-Stokes PDE dynamics environments validate the effectiveness of the proposed method in achieving better general performance and boundary constraint satisfaction compared to the model-free controller baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15643v1-abstract-full').style.display = 'none'; document.getElementById('2411.15643v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 5 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10739">arXiv:2411.10739</a> <span> [<a href="https://arxiv.org/pdf/2411.10739">pdf</a>, <a href="https://arxiv.org/format/2411.10739">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Wearable Gait Monitoring System for 17 Gait Parameters Based on Computer Vision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jiangang Chen</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yung-Hong Sun</a>, <a href="/search/eess?searchtype=author&query=Pickett%2C+K">Kristen Pickett</a>, <a href="/search/eess?searchtype=author&query=King%2C+B">Barbara King</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y+H">Yu Hen Hu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Hongrui Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10739v1-abstract-short" style="display: inline;"> We developed a shoe-mounted gait monitoring system capable of tracking up to 17 gait parameters, including gait length, step time, stride velocity, and others. The system employs a stereo camera mounted on one shoe to track a marker placed on the opposite shoe, enabling the estimation of spatial gait parameters. Additionally, a Force Sensitive Resistor (FSR) affixed to the heel of the shoe, combin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10739v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10739v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10739v1-abstract-full" style="display: none;"> We developed a shoe-mounted gait monitoring system capable of tracking up to 17 gait parameters, including gait length, step time, stride velocity, and others. The system employs a stereo camera mounted on one shoe to track a marker placed on the opposite shoe, enabling the estimation of spatial gait parameters. Additionally, a Force Sensitive Resistor (FSR) affixed to the heel of the shoe, combined with a custom-designed algorithm, is utilized to measure temporal gait parameters. Through testing on multiple participants and comparison with the gait mat, the proposed gait monitoring system exhibited notable performance, with the accuracy of all measured gait parameters exceeding 93.61%. The system also demonstrated a low drift of 4.89% during long-distance walking. A gait identification task conducted on participants using a trained Transformer model achieved 95.7% accuracy on the dataset collected by the proposed system, demonstrating that our hardware has the potential to collect long-sequence gait data suitable for integration with current Large Language Models (LLMs). The system is cost-effective, user-friendly, and well-suited for real-life measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10739v1-abstract-full').style.display = 'none'; document.getElementById('2411.10739v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 14 figures. This paper was submitted for publication to the IEEE Transactions on Instrumentation and Measurement</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09154">arXiv:2411.09154</a> <span> [<a href="https://arxiv.org/pdf/2411.09154">pdf</a>, <a href="https://arxiv.org/ps/2411.09154">ps</a>, <a href="https://arxiv.org/format/2411.09154">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> STAR-RIS Enabled ISAC Systems: Joint Rate Splitting and Beamforming Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+R">Ruihong Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yongdong Zhu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Huimin Hu</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+Q">Qiang Ni</a>, <a href="/search/eess?searchtype=author&query=Fei%2C+Z">Zesong Fei</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09154v1-abstract-short" style="display: inline;"> This paper delves into an integrated sensing and communication (ISAC) system bolstered by a simultaneously transmitting and reflecting reconfigurable intelligent surface (STAR-RIS). Within this system, a base station (BS) is equipped with communication and radar capabilities, enabling it to communicate with ground terminals (GTs) and concurrently probe for echo signals from a target of interest. M… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09154v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09154v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09154v1-abstract-full" style="display: none;"> This paper delves into an integrated sensing and communication (ISAC) system bolstered by a simultaneously transmitting and reflecting reconfigurable intelligent surface (STAR-RIS). Within this system, a base station (BS) is equipped with communication and radar capabilities, enabling it to communicate with ground terminals (GTs) and concurrently probe for echo signals from a target of interest. Moreover, to manage interference and improve communication quality, the rate splitting multiple access (RSMA) scheme is incorporated into the system. The signal-to-interference-plus-noise ratio (SINR) of the received sensing echo signals is a measure of sensing performance. We formulate a joint optimization problem of common rates, transmit beamforming at the BS, and passive beamforming vectors of the STAR-RIS. The objective is to maximize sensing SINR while guaranteeing the communication rate requirements for each GT. We present an iterative algorithm to address the non-convex problem by invoking Dinkelbach's transform, semidefinite relaxation (SDR), majorization-minimization, and sequential rank-one constraint relaxation (SROCR) theories. Simulation results manifest that the performance of the studied ISAC network enhanced by the STAR-RIS and RSMA surpasses other benchmarks considerably. The results evidently indicate the superior performance improvement of the ISAC system with the proposed RSMA-based transmission strategy design and the dynamic optimization of both transmission and reflection beamforming at STAR-RIS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09154v1-abstract-full').style.display = 'none'; document.getElementById('2411.09154v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00499">arXiv:2411.00499</a> <span> [<a href="https://arxiv.org/pdf/2411.00499">pdf</a>, <a href="https://arxiv.org/format/2411.00499">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Cross-modal semantic segmentation for indoor environmental perception using single-chip millimeter-wave radar raw data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hairuo Hu</a>, <a href="/search/eess?searchtype=author&query=Cong%2C+H">Haiyong Cong</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+Z">Zhuyu Shao</a>, <a href="/search/eess?searchtype=author&query=Bi%2C+Y">Yubo Bi</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jinghao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00499v2-abstract-short" style="display: inline;"> In the context of firefighting and rescue operations, a cross-modal semantic segmentation model based on a single-chip millimeter-wave (mmWave) radar for indoor environmental perception is proposed and discussed. To efficiently obtain high-quality labels, an automatic label generation method utilizing LiDAR point clouds and occupancy grid maps is introduced. The proposed segmentation model is base… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00499v2-abstract-full').style.display = 'inline'; document.getElementById('2411.00499v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00499v2-abstract-full" style="display: none;"> In the context of firefighting and rescue operations, a cross-modal semantic segmentation model based on a single-chip millimeter-wave (mmWave) radar for indoor environmental perception is proposed and discussed. To efficiently obtain high-quality labels, an automatic label generation method utilizing LiDAR point clouds and occupancy grid maps is introduced. The proposed segmentation model is based on U-Net. A spatial attention module is incorporated, which enhanced the performance of the mode. The results demonstrate that cross-modal semantic segmentation provides a more intuitive and accurate representation of indoor environments. Unlike traditional methods, the model's segmentation performance is minimally affected by azimuth. Although performance declines with increasing distance, this can be mitigated by a well-designed model. Additionally, it was found that using raw ADC data as input is ineffective; compared to RA tensors, RD tensors are more suitable for the proposed model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00499v2-abstract-full').style.display = 'none'; document.getElementById('2411.00499v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5291 words, 17 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21276">arXiv:2410.21276</a> <span> [<a href="https://arxiv.org/pdf/2410.21276">pdf</a>, <a href="https://arxiv.org/format/2410.21276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> GPT-4o System Card </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=OpenAI"> OpenAI</a>, <a href="/search/eess?searchtype=author&query=%3A"> :</a>, <a href="/search/eess?searchtype=author&query=Hurst%2C+A">Aaron Hurst</a>, <a href="/search/eess?searchtype=author&query=Lerer%2C+A">Adam Lerer</a>, <a href="/search/eess?searchtype=author&query=Goucher%2C+A+P">Adam P. Goucher</a>, <a href="/search/eess?searchtype=author&query=Perelman%2C+A">Adam Perelman</a>, <a href="/search/eess?searchtype=author&query=Ramesh%2C+A">Aditya Ramesh</a>, <a href="/search/eess?searchtype=author&query=Clark%2C+A">Aidan Clark</a>, <a href="/search/eess?searchtype=author&query=Ostrow%2C+A">AJ Ostrow</a>, <a href="/search/eess?searchtype=author&query=Welihinda%2C+A">Akila Welihinda</a>, <a href="/search/eess?searchtype=author&query=Hayes%2C+A">Alan Hayes</a>, <a href="/search/eess?searchtype=author&query=Radford%2C+A">Alec Radford</a>, <a href="/search/eess?searchtype=author&query=M%C4%85dry%2C+A">Aleksander M膮dry</a>, <a href="/search/eess?searchtype=author&query=Baker-Whitcomb%2C+A">Alex Baker-Whitcomb</a>, <a href="/search/eess?searchtype=author&query=Beutel%2C+A">Alex Beutel</a>, <a href="/search/eess?searchtype=author&query=Borzunov%2C+A">Alex Borzunov</a>, <a href="/search/eess?searchtype=author&query=Carney%2C+A">Alex Carney</a>, <a href="/search/eess?searchtype=author&query=Chow%2C+A">Alex Chow</a>, <a href="/search/eess?searchtype=author&query=Kirillov%2C+A">Alex Kirillov</a>, <a href="/search/eess?searchtype=author&query=Nichol%2C+A">Alex Nichol</a>, <a href="/search/eess?searchtype=author&query=Paino%2C+A">Alex Paino</a>, <a href="/search/eess?searchtype=author&query=Renzin%2C+A">Alex Renzin</a>, <a href="/search/eess?searchtype=author&query=Passos%2C+A+T">Alex Tachard Passos</a>, <a href="/search/eess?searchtype=author&query=Kirillov%2C+A">Alexander Kirillov</a>, <a href="/search/eess?searchtype=author&query=Christakis%2C+A">Alexi Christakis</a> , et al. (395 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21276v1-abstract-short" style="display: inline;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It's trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 mil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21276v1-abstract-full" style="display: none;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It's trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 milliseconds, which is similar to human response time in conversation. It matches GPT-4 Turbo performance on text in English and code, with significant improvement on text in non-English languages, while also being much faster and 50\% cheaper in the API. GPT-4o is especially better at vision and audio understanding compared to existing models. In line with our commitment to building AI safely and consistent with our voluntary commitments to the White House, we are sharing the GPT-4o System Card, which includes our Preparedness Framework evaluations. In this System Card, we provide a detailed look at GPT-4o's capabilities, limitations, and safety evaluations across multiple categories, focusing on speech-to-speech while also evaluating text and image capabilities, and measures we've implemented to ensure the model is safe and aligned. We also include third-party assessments on dangerous capabilities, as well as discussion of potential societal impacts of GPT-4o's text and vision capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'none'; document.getElementById('2410.21276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16281">arXiv:2410.16281</a> <span> [<a href="https://arxiv.org/pdf/2410.16281">pdf</a>, <a href="https://arxiv.org/format/2410.16281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Verification of Neural Control Barrier Functions with Symbolic Derivative Bounds Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanjiang Hu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yujie Yang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+T">Tianhao Wei</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Changliu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16281v1-abstract-short" style="display: inline;"> Control barrier functions (CBFs) are important in safety-critical systems and robot control applications. Neural networks have been used to parameterize and synthesize CBFs with bounded control input for complex systems. However, it is still challenging to verify pre-trained neural networks CBFs (neural CBFs) in an efficient symbolic manner. To this end, we propose a new efficient verification fra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16281v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16281v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16281v1-abstract-full" style="display: none;"> Control barrier functions (CBFs) are important in safety-critical systems and robot control applications. Neural networks have been used to parameterize and synthesize CBFs with bounded control input for complex systems. However, it is still challenging to verify pre-trained neural networks CBFs (neural CBFs) in an efficient symbolic manner. To this end, we propose a new efficient verification framework for ReLU-based neural CBFs through symbolic derivative bound propagation by combining the linearly bounded nonlinear dynamic system and the gradient bounds of neural CBFs. Specifically, with Heaviside step function form for derivatives of activation functions, we show that the symbolic bounds can be propagated through the inner product of neural CBF Jacobian and nonlinear system dynamics. Through extensive experiments on different robot dynamics, our results outperform the interval arithmetic based baselines in verified rate and verification time along the CBF boundary, validating the effectiveness and efficiency of the proposed method with different model complexity. The code can be found at https://github.com/intelligent-control-lab/ verify-neural-CBF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16281v1-abstract-full').style.display = 'none'; document.getElementById('2410.16281v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CoRL 2024, 18 pages, 6 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10387">arXiv:2410.10387</a> <span> [<a href="https://arxiv.org/pdf/2410.10387">pdf</a>, <a href="https://arxiv.org/format/2410.10387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Robust Tracking Control with Neural Network Dynamic Models under Input Perturbations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+H">Huixuan Cheng</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanjiang Hu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Changliu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10387v1-abstract-short" style="display: inline;"> Robust control problem has significant practical implication since external disturbances can significantly impact the performance of control method. Existing robust control method excels at control-affine system but fails at neural network dynamic models. Developing robust control methods for such systems remains a complex challenge. In this paper, we focus on robust tracking method for neural net… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10387v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10387v1-abstract-full" style="display: none;"> Robust control problem has significant practical implication since external disturbances can significantly impact the performance of control method. Existing robust control method excels at control-affine system but fails at neural network dynamic models. Developing robust control methods for such systems remains a complex challenge. In this paper, we focus on robust tracking method for neural network dynamic models. We first propose reachability analysis tool designed for this system and then introduce how to reformulate robust tracking problem with the reachable sets. In addition, we prove the existence of feedback policy that bounds the growth of reachable set over infinite horizon. The effectiveness of proposed approach is validated through numerical tracking task simulations, where we compare it with a standard tube MPC method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10387v1-abstract-full').style.display = 'none'; document.getElementById('2410.10387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16115">arXiv:2409.16115</a> <span> [<a href="https://arxiv.org/pdf/2409.16115">pdf</a>, <a href="https://arxiv.org/format/2409.16115">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Mean Age of Information in Partial Offloading Mobile Edge Computing Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Dong%2C+Y">Ying Dong</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+H">Hang Xiao</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haonan Hu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qianbin Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jie Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16115v1-abstract-short" style="display: inline;"> The age of information (AoI) performance analysis is essential for evaluating the information freshness in the large-scale mobile edge computing (MEC) networks. This work proposes the earliest analysis of the mean AoI (MAoI) performance of large-scale partial offloading MEC networks. Firstly, we derive and validate the closed-form expressions of MAoI by using queueing theory and stochastic geometr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16115v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16115v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16115v1-abstract-full" style="display: none;"> The age of information (AoI) performance analysis is essential for evaluating the information freshness in the large-scale mobile edge computing (MEC) networks. This work proposes the earliest analysis of the mean AoI (MAoI) performance of large-scale partial offloading MEC networks. Firstly, we derive and validate the closed-form expressions of MAoI by using queueing theory and stochastic geometry. Based on these expressions, we analyse the effects of computing offloading ratio (COR) and task generation rate (TGR) on the MAoI performance and compare the MAoI performance under the local computing, remote computing, and partial offloading schemes. The results show that by jointly optimising the COR and TGR, the partial offloading scheme outperforms the local and remote computing schemes in terms of the MAoI, which can be improved by up to 51% and 61%, respectively. This encourages the MEC networks to adopt the partial offloading scheme to improve the MAoI performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16115v1-abstract-full').style.display = 'none'; document.getElementById('2409.16115v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13867">arXiv:2409.13867</a> <span> [<a href="https://arxiv.org/pdf/2409.13867">pdf</a>, <a href="https://arxiv.org/format/2409.13867">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> MAGICS: Adversarial RL with Minimax Actors Guided by Implicit Critic Stackelberg for Convergent Neural Synthesis of Robot Safety </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Justin Wang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/eess?searchtype=author&query=Nguyen%2C+D+P">Duy Phuong Nguyen</a>, <a href="/search/eess?searchtype=author&query=Fisac%2C+J+F">Jaime Fern谩ndez Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13867v1-abstract-short" style="display: inline;"> While robust optimal control theory provides a rigorous framework to compute robot control policies that are provably safe, it struggles to scale to high-dimensional problems, leading to increased use of deep learning for tractable synthesis of robot safety. Unfortunately, existing neural safety synthesis methods often lack convergence guarantees and solution interpretability. In this paper, we pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13867v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13867v1-abstract-full" style="display: none;"> While robust optimal control theory provides a rigorous framework to compute robot control policies that are provably safe, it struggles to scale to high-dimensional problems, leading to increased use of deep learning for tractable synthesis of robot safety. Unfortunately, existing neural safety synthesis methods often lack convergence guarantees and solution interpretability. In this paper, we present Minimax Actors Guided by Implicit Critic Stackelberg (MAGICS), a novel adversarial reinforcement learning (RL) algorithm that guarantees local convergence to a minimax equilibrium solution. We then build on this approach to provide local convergence guarantees for a general deep RL-based robot safety synthesis algorithm. Through both simulation studies on OpenAI Gym environments and hardware experiments with a 36-dimensional quadruped robot, we show that MAGICS can yield robust control policies outperforming the state-of-the-art neural safety synthesis methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13867v1-abstract-full').style.display = 'none'; document.getElementById('2409.13867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Algorithmic Foundations of Robotics (WAFR) XVI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12454">arXiv:2409.12454</a> <span> [<a href="https://arxiv.org/pdf/2409.12454">pdf</a>, <a href="https://arxiv.org/format/2409.12454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> FoME: A Foundation Model for EEG using Adaptive Temporal-Lateral Attention Scaling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+E">Enze Shi</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+K">Kui Zhao</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Q">Qilong Yuan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Huawen Hu</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+S">Sigang Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12454v1-abstract-short" style="display: inline;"> Electroencephalography (EEG) is a vital tool to measure and record brain activity in neuroscience and clinical applications, yet its potential is constrained by signal heterogeneity, low signal-to-noise ratios, and limited labeled datasets. In this paper, we propose FoME (Foundation Model for EEG), a novel approach using adaptive temporal-lateral attention scaling to address above-mentioned challe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12454v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12454v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12454v1-abstract-full" style="display: none;"> Electroencephalography (EEG) is a vital tool to measure and record brain activity in neuroscience and clinical applications, yet its potential is constrained by signal heterogeneity, low signal-to-noise ratios, and limited labeled datasets. In this paper, we propose FoME (Foundation Model for EEG), a novel approach using adaptive temporal-lateral attention scaling to address above-mentioned challenges. FoME is pre-trained on a diverse 1.7TB dataset of scalp and intracranial EEG recordings, comprising 745M parameters trained for 1,096k steps. Our model introduces two key innovations: a time-frequency fusion embedding technique and an adaptive time-lateral attention scaling (ATLAS) mechanism. These components synergistically capture complex temporal and spectral EEG dynamics, enabling FoME to adapt to varying patterns across diverse data streams and facilitate robust multi-channel modeling. Evaluations across four downstream tasks demonstrate FoME's superior performance in classification and forecasting applications, consistently achieving state-of-the-art results. To conclude, FoME establishes a new paradigm for EEG analysis, offering a versatile foundation that advances brain-computer interfaces, clinical diagnostics, and cognitive research across neuroscience and related fields. Our code will be available at https://github.com/1061413241/FoME. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12454v1-abstract-full').style.display = 'none'; document.getElementById('2409.12454v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09525">arXiv:2409.09525</a> <span> [<a href="https://arxiv.org/pdf/2409.09525">pdf</a>, <a href="https://arxiv.org/format/2409.09525">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> Foundations of Vision-Based Localization: A New Approach to Localizability Analysis Using Stochastic Geometry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haozhou Hu</a>, <a href="/search/eess?searchtype=author&query=Dhillon%2C+H+S">Harpreet S. Dhillon</a>, <a href="/search/eess?searchtype=author&query=Buehrer%2C+R+M">R. Michael Buehrer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09525v1-abstract-short" style="display: inline;"> Despite significant algorithmic advances in vision-based positioning, a comprehensive probabilistic framework to study its performance has remained unexplored. The main objective of this paper is to develop such a framework using ideas from stochastic geometry. Due to limitations in sensor resolution, the level of detail in prior information, and computational resources, we may not be able to diff… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09525v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09525v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09525v1-abstract-full" style="display: none;"> Despite significant algorithmic advances in vision-based positioning, a comprehensive probabilistic framework to study its performance has remained unexplored. The main objective of this paper is to develop such a framework using ideas from stochastic geometry. Due to limitations in sensor resolution, the level of detail in prior information, and computational resources, we may not be able to differentiate between landmarks with similar appearances in the vision data, such as trees, lampposts, and bus stops. While one cannot accurately determine the absolute target position using a single indistinguishable landmark, obtaining an approximate position fix is possible if the target can see multiple landmarks whose geometric placement on the map is unique. Modeling the locations of these indistinguishable landmarks as a Poisson point process (PPP) $桅$ on $\mathbb{R}^2$, we develop a new approach to analyze the localizability in this setting. From the target location $\mathbb{x}$, the measurements are obtained from landmarks within the visibility region. These measurements, including ranges and angles to the landmarks, denoted as $f(\mathbb{x})$, can be treated as mappings from the target location. We are interested in understanding the probability that the measurements $f(\mathbb{x})$ are sufficiently distinct from the measurement $f(\mathbb{x}_0)$ at the given location, which we term localizability. Expressions of localizability probability are derived for specific vision-inspired measurements, such as ranges to landmarks and snapshots of their locations. Our analysis reveals that the localizability probability approaches one when the landmark intensity tends to infinity, which means that error-free localization is achievable in this limiting regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09525v1-abstract-full').style.display = 'none'; document.getElementById('2409.09525v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14156">arXiv:2408.14156</a> <span> [<a href="https://arxiv.org/pdf/2408.14156">pdf</a>, <a href="https://arxiv.org/format/2408.14156">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Integrated Sensing, Communication, and Powering over Multi-antenna OFDM Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yilong Chen</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+C">Chao Hu</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+Z">Zixiang Ren</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lexi Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lei Liu</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14156v1-abstract-short" style="display: inline;"> This paper considers a multi-functional orthogonal frequency division multiplexing (OFDM) system with integrated sensing, communication, and powering (ISCAP), in which a multi-antenna base station (BS) transmits OFDM signals to simultaneously deliver information to multiple information receivers (IRs), provide energy supply to multiple energy receivers (ERs), and sense potential targets based on t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14156v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14156v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14156v1-abstract-full" style="display: none;"> This paper considers a multi-functional orthogonal frequency division multiplexing (OFDM) system with integrated sensing, communication, and powering (ISCAP), in which a multi-antenna base station (BS) transmits OFDM signals to simultaneously deliver information to multiple information receivers (IRs), provide energy supply to multiple energy receivers (ERs), and sense potential targets based on the echo signals. To facilitate ISCAP, the BS employs the joint transmit beamforming design by sending dedicated sensing/energy beams jointly with information beams. Furthermore, we consider the beam scanning for sensing, in which the joint beams scan in different directions over time to sense potential targets. In order to ensure the sensing beam scanning performance and meet the communication and powering requirements, it is essential to properly schedule IRs and ERs and design the resource allocation over time, frequency, and space. More specifically, we optimize the joint transmit beamforming over multiple OFDM symbols and subcarriers, with the objective of minimizing the average beampattern matching error of beam scanning for sensing, subject to the constraints on the average communication rates at IRs and the average harvested power at ERs. We find converged high-quality solutions to the formulated problem by proposing efficient iterative algorithms based on advanced optimization techniques. We also develop various heuristic designs based on the principles of zero-forcing (ZF) beamforming, round-robin user scheduling, and time switching, respectively. Numerical results show that our proposed algorithms adaptively generate information and sensing/energy beams at each time-frequency slot to match the scheduled IRs/ERs with the desired scanning beam, significantly outperforming the heuristic designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14156v1-abstract-full').style.display = 'none'; document.getElementById('2408.14156v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02095">arXiv:2408.02095</a> <span> [<a href="https://arxiv.org/pdf/2408.02095">pdf</a>, <a href="https://arxiv.org/format/2408.02095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Secure Semantic Communications: From Perspective of Physical Layer Security </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yongkang Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+Y">Yaru Fu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+H">Hongjiang Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02095v1-abstract-short" style="display: inline;"> Semantic communications have been envisioned as a potential technique that goes beyond Shannon paradigm. Unlike modern communications that provide bit-level security, the eaves-dropping of semantic communications poses a significant risk of potentially exposing intention of legitimate user. To address this challenge, a novel deep neural network (DNN) enabled secure semantic communication (DeepSSC)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02095v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02095v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02095v1-abstract-full" style="display: none;"> Semantic communications have been envisioned as a potential technique that goes beyond Shannon paradigm. Unlike modern communications that provide bit-level security, the eaves-dropping of semantic communications poses a significant risk of potentially exposing intention of legitimate user. To address this challenge, a novel deep neural network (DNN) enabled secure semantic communication (DeepSSC) system is developed by capitalizing on physical layer security. To balance the tradeoff between security and reliability, a two-phase training method for DNNs is devised. Particularly, Phase I aims at semantic recovery of legitimate user, while Phase II attempts to minimize the leakage of semantic information to eavesdroppers. The loss functions of DeepSSC in Phases I and II are respectively designed according to Shannon capacity and secure channel capacity, which are approximated with variational inference. Moreover, we define the metric of secure bilingual evaluation understudy (S-BLEU) to assess the security of semantic communications. Finally, simulation results demonstrate that DeepSSC achieves a significant boost to semantic security particularly in high signal-to-noise ratio regime, despite a minor degradation of reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02095v1-abstract-full').style.display = 'none'; document.getElementById('2408.02095v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20532">arXiv:2407.20532</a> <span> [<a href="https://arxiv.org/pdf/2407.20532">pdf</a>, <a href="https://arxiv.org/format/2407.20532">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Scalable Synthesis of Formally Verified Neural Value Function for Hamilton-Jacobi Reachability Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yujie Yang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanjiang Hu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+T">Tianhao Wei</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S+E">Shengbo Eben Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Changliu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20532v2-abstract-short" style="display: inline;"> Hamilton-Jacobi (HJ) reachability analysis provides a formal method for guaranteeing safety in constrained control problems. It synthesizes a value function to represent a long-term safe set called feasible region. Early synthesis methods based on state space discretization cannot scale to high-dimensional problems, while recent methods that use neural networks to approximate value functions resul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20532v2-abstract-full').style.display = 'inline'; document.getElementById('2407.20532v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20532v2-abstract-full" style="display: none;"> Hamilton-Jacobi (HJ) reachability analysis provides a formal method for guaranteeing safety in constrained control problems. It synthesizes a value function to represent a long-term safe set called feasible region. Early synthesis methods based on state space discretization cannot scale to high-dimensional problems, while recent methods that use neural networks to approximate value functions result in unverifiable feasible regions. To achieve both scalability and verifiability, we propose a framework for synthesizing verified neural value functions for HJ reachability analysis. Our framework consists of three stages: pre-training, adversarial training, and verification-guided training. We design three techniques to address three challenges to improve scalability respectively: boundary-guided backtracking (BGB) to improve counterexample search efficiency, entering state regularization (ESR) to enlarge feasible region, and activation pattern alignment (APA) to accelerate neural network verification. We also provide a neural safety certificate synthesis and verification benchmark called Cersyve-9, which includes nine commonly used safe control tasks and supplements existing neural network verification benchmarks. Our framework successfully synthesizes verified neural value functions on all tasks, and our proposed three techniques exhibit superior scalability and efficiency compared with existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20532v2-abstract-full').style.display = 'none'; document.getElementById('2407.20532v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08961">arXiv:2407.08961</a> <span> [<a href="https://arxiv.org/pdf/2407.08961">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Tissue-Contrastive Semi-Masked Autoencoders for Segmentation Pretraining on Chest CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zheng%2C+J">Jie Zheng</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+R">Ru Wen</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haiqin Hu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+L">Lina Wei</a>, <a href="/search/eess?searchtype=author&query=Su%2C+K">Kui Su</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chen Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08961v1-abstract-short" style="display: inline;"> Existing Masked Image Modeling (MIM) depends on a spatial patch-based masking-reconstruction strategy to perceive objects'features from unlabeled images, which may face two limitations when applied to chest CT: 1) inefficient feature learning due to complex anatomical details presented in CT images, and 2) suboptimal knowledge transfer owing to input disparity between upstream and downstream model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08961v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08961v1-abstract-full" style="display: none;"> Existing Masked Image Modeling (MIM) depends on a spatial patch-based masking-reconstruction strategy to perceive objects'features from unlabeled images, which may face two limitations when applied to chest CT: 1) inefficient feature learning due to complex anatomical details presented in CT images, and 2) suboptimal knowledge transfer owing to input disparity between upstream and downstream models. To address these issues, we propose a new MIM method named Tissue-Contrastive Semi-Masked Autoencoder (TCS-MAE) for modeling chest CT images. Our method has two novel designs: 1) a tissue-based masking-reconstruction strategy to capture more fine-grained anatomical features, and 2) a dual-AE architecture with contrastive learning between the masked and original image views to bridge the gap of the upstream and downstream models. To validate our method, we systematically investigate representative contrastive, generative, and hybrid self-supervised learning methods on top of tasks involving segmenting pneumonia, mediastinal tumors, and various organs. The results demonstrate that, compared to existing methods, our TCS-MAE more effectively learns tissue-aware representations, thereby significantly enhancing segmentation performance across all tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08961v1-abstract-full').style.display = 'none'; document.getElementById('2407.08961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05407">arXiv:2407.05407</a> <span> [<a href="https://arxiv.org/pdf/2407.05407">pdf</a>, <a href="https://arxiv.org/format/2407.05407">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CosyVoice: A Scalable Multilingual Zero-shot Text-to-speech Synthesizer based on Supervised Semantic Tokens </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+Z">Zhihao Du</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+K">Kai Hu</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+H">Heng Lu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yexin Yang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hangrui Hu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+S">Siqi Zheng</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Y">Yue Gu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Z">Zhijie Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05407v2-abstract-short" style="display: inline;"> Recent years have witnessed a trend that large language model (LLM) based text-to-speech (TTS) emerges into the mainstream due to their high naturalness and zero-shot capacity. In this paradigm, speech signals are discretized into token sequences, which are modeled by an LLM with text as prompts and reconstructed by a token-based vocoder to waveforms. Obviously, speech tokens play a critical role… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05407v2-abstract-full').style.display = 'inline'; document.getElementById('2407.05407v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05407v2-abstract-full" style="display: none;"> Recent years have witnessed a trend that large language model (LLM) based text-to-speech (TTS) emerges into the mainstream due to their high naturalness and zero-shot capacity. In this paradigm, speech signals are discretized into token sequences, which are modeled by an LLM with text as prompts and reconstructed by a token-based vocoder to waveforms. Obviously, speech tokens play a critical role in LLM-based TTS models. Current speech tokens are learned in an unsupervised manner, which lacks explicit semantic information and alignment to the text. In this paper, we propose to represent speech with supervised semantic tokens, which are derived from a multilingual speech recognition model by inserting vector quantization into the encoder. Based on the tokens, we further propose a scalable zero-shot TTS synthesizer, CosyVoice, which consists of an LLM for text-to-token generation and a conditional flow matching model for token-to-speech synthesis. Experimental results show that supervised semantic tokens significantly outperform existing unsupervised tokens in terms of content consistency and speaker similarity for zero-shot voice cloning. Moreover, we find that utilizing large-scale data further improves the synthesis performance, indicating the scalable capacity of CosyVoice. To the best of our knowledge, this is the first attempt to involve supervised speech tokens into TTS models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05407v2-abstract-full').style.display = 'none'; document.getElementById('2407.05407v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">work in progress. arXiv admin note: substantial text overlap with arXiv:2407.04051</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04051">arXiv:2407.04051</a> <span> [<a href="https://arxiv.org/pdf/2407.04051">pdf</a>, <a href="https://arxiv.org/format/2407.04051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> FunAudioLLM: Voice Understanding and Generation Foundation Models for Natural Interaction Between Humans and LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=An%2C+K">Keyu An</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+C">Chong Deng</a>, <a href="/search/eess?searchtype=author&query=Du%2C+Z">Zhihao Du</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+C">Changfeng Gao</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Y">Yue Gu</a>, <a href="/search/eess?searchtype=author&query=He%2C+T">Ting He</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hangrui Hu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+K">Kai Hu</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+S">Shengpeng Ji</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yabin Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zerui Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+H">Heng Lu</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+H">Haoneng Luo</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+X">Xiang Lv</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+B">Bin Ma</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+C">Chongjia Ni</a>, <a href="/search/eess?searchtype=author&query=Song%2C+C">Changhe Song</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jiaqi Shi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+X">Xian Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wen Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04051v3-abstract-short" style="display: inline;"> This report introduces FunAudioLLM, a model family designed to enhance natural voice interactions between humans and large language models (LLMs). At its core are two innovative models: SenseVoice, which handles multilingual speech recognition, emotion recognition, and audio event detection; and CosyVoice, which facilitates natural speech generation with control over multiple languages, timbre, sp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04051v3-abstract-full').style.display = 'inline'; document.getElementById('2407.04051v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04051v3-abstract-full" style="display: none;"> This report introduces FunAudioLLM, a model family designed to enhance natural voice interactions between humans and large language models (LLMs). At its core are two innovative models: SenseVoice, which handles multilingual speech recognition, emotion recognition, and audio event detection; and CosyVoice, which facilitates natural speech generation with control over multiple languages, timbre, speaking style, and speaker identity. SenseVoice-Small delivers exceptionally low-latency ASR for 5 languages, and SenseVoice-Large supports high-precision ASR for over 50 languages, while CosyVoice excels in multi-lingual voice generation, zero-shot in-context learning, cross-lingual voice cloning, and instruction-following capabilities. The models related to SenseVoice and CosyVoice have been open-sourced on Modelscope and Huggingface, along with the corresponding training, inference, and fine-tuning codes released on GitHub. By integrating these models with LLMs, FunAudioLLM enables applications such as speech-to-speech translation, emotional voice chat, interactive podcasts, and expressive audiobook narration, thereby pushing the boundaries of voice interaction technology. Demos are available at https://fun-audio-llm.github.io, and the code can be accessed at https://github.com/FunAudioLLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04051v3-abstract-full').style.display = 'none'; document.getElementById('2407.04051v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress. Authors are listed in alphabetical order by family name</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19608">arXiv:2406.19608</a> <span> [<a href="https://arxiv.org/pdf/2406.19608">pdf</a>, <a href="https://arxiv.org/format/2406.19608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Multi-service collaboration and composition of cloud manufacturing customized production based on problem decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yue%2C+H">Hao Yue</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yingtao Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+M">Min Wang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hesuan Hu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+W">Weimin Wu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jihui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19608v1-abstract-short" style="display: inline;"> Cloud manufacturing system is a service-oriented and knowledge-based one, which can provide solutions for the large-scale customized production. The service resource allocation is the primary factor that restricts the production time and cost in the cloud manufacturing customized production (CMCP). In order to improve the efficiency and reduce the cost in CMCP, we propose a new framework which con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19608v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19608v1-abstract-full" style="display: none;"> Cloud manufacturing system is a service-oriented and knowledge-based one, which can provide solutions for the large-scale customized production. The service resource allocation is the primary factor that restricts the production time and cost in the cloud manufacturing customized production (CMCP). In order to improve the efficiency and reduce the cost in CMCP, we propose a new framework which considers the collaboration among services with the same functionality. A mathematical evaluation formulation for the service composition and service usage scheme is constructed with the following critical indexes: completion time, cost, and number of selected services. Subsequently, a problem decomposition based genetic algorithm is designed to obtain the optimal service compositions with service usage schemes. A smart clothing customization case is illustrated so as to show the effectiveness and efficiency of the method proposed in this paper. Finally, the results of simulation experiments and comparisons show that these solutions obtained by our method are with the minimum time, a lower cost, and the fewer selected services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19608v1-abstract-full').style.display = 'none'; document.getElementById('2406.19608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09810">arXiv:2406.09810</a> <span> [<a href="https://arxiv.org/pdf/2406.09810">pdf</a>, <a href="https://arxiv.org/format/2406.09810">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Think Deep and Fast: Learning Neural Nonlinear Opinion Dynamics from Inverse Dynamic Games for Split-Second Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/eess?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/eess?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/eess?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/eess?searchtype=author&query=Leonard%2C+N+E">Naomi Ehrich Leonard</a>, <a href="/search/eess?searchtype=author&query=Fisac%2C+J+F">Jaime Fern谩ndez Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09810v1-abstract-short" style="display: inline;"> Non-cooperative interactions commonly occur in multi-agent scenarios such as car racing, where an ego vehicle can choose to overtake the rival, or stay behind it until a safe overtaking "corridor" opens. While an expert human can do well at making such time-sensitive decisions, the development of safe and efficient game-theoretic trajectory planners capable of rapidly reasoning discrete options is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09810v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09810v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09810v1-abstract-full" style="display: none;"> Non-cooperative interactions commonly occur in multi-agent scenarios such as car racing, where an ego vehicle can choose to overtake the rival, or stay behind it until a safe overtaking "corridor" opens. While an expert human can do well at making such time-sensitive decisions, the development of safe and efficient game-theoretic trajectory planners capable of rapidly reasoning discrete options is yet to be fully addressed. The recently developed nonlinear opinion dynamics (NOD) show promise in enabling fast opinion formation and avoiding safety-critical deadlocks. However, it remains an open challenge to determine the model parameters of NOD automatically and adaptively, accounting for the ever-changing environment of interaction. In this work, we propose for the first time a learning-based, game-theoretic approach to synthesize a Neural NOD model from expert demonstrations, given as a dataset containing (possibly incomplete) state and action trajectories of interacting agents. The learned NOD can be used by existing dynamic game solvers to plan decisively while accounting for the predicted change of other agents' intents, thus enabling situational awareness in planning. We demonstrate Neural NOD's ability to make fast and robust decisions in a simulated autonomous racing example, leading to tangible improvements in safety and overtaking performance over state-of-the-art data-driven game-theoretic planning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09810v1-abstract-full').style.display = 'none'; document.getElementById('2406.09810v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08038">arXiv:2406.08038</a> <span> [<a href="https://arxiv.org/pdf/2406.08038">pdf</a>, <a href="https://arxiv.org/format/2406.08038">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Interference Analysis for Coexistence of UAVs and Civil Aircrafts Based on Automatic Dependent Surveillance-Broadcast </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liao%2C+Y">Yiyang Liao</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+Z">Ziye Jia</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qihui Wu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Huiling Hu</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08038v1-abstract-short" style="display: inline;"> Due to the advantages of high mobility and easy deployment, unmanned aerial vehicles (UAVs) are widely applied in both military and civilian fields. In order to strengthen the flight surveillance of UAVs and guarantee the airspace safety, UAVs can be equipped with the automatic dependent surveillance-broadcast (ADS-B) system, which periodically sends flight information to other aircrafts and groun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08038v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08038v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08038v1-abstract-full" style="display: none;"> Due to the advantages of high mobility and easy deployment, unmanned aerial vehicles (UAVs) are widely applied in both military and civilian fields. In order to strengthen the flight surveillance of UAVs and guarantee the airspace safety, UAVs can be equipped with the automatic dependent surveillance-broadcast (ADS-B) system, which periodically sends flight information to other aircrafts and ground stations (GSs). However, due to the limited resource of channel capacity, UAVs equipped with ADS-B results in the interference between UAVs and civil aircrafts (CAs), which further impacts the accuracy of received information at GSs. In detail, the channel capacity is mainly affected by the density of aircrafts and the transmitting power of ADS-B. Hence, based on the three-dimensional poisson point process, this work leverages the stochastic geometry theory to build a model of the coexistence of UAVs and CAs and analyze the interference performance of ADS-B monitoring system. From simulation results, we reveal the effects of transmitting power, density, threshold and pathloss on the performance of the ADS-B monitoring system. Besides, we provide the suggested transmitting power and density for the safe coexistence of UAVs and CAs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08038v1-abstract-full').style.display = 'none'; document.getElementById('2406.08038v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13636">arXiv:2405.13636</a> <span> [<a href="https://arxiv.org/pdf/2405.13636">pdf</a>, <a href="https://arxiv.org/format/2405.13636">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Audio Mamba: Pretrained Audio State Space Model For Audio Tagging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+J">Jiaju Lin</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haoxuan Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13636v1-abstract-short" style="display: inline;"> Audio tagging is an important task of mapping audio samples to their corresponding categories. Recently endeavours that exploit transformer models in this field have achieved great success. However, the quadratic self-attention cost limits the scaling of audio transformer models and further constrains the development of more universal audio models. In this paper, we attempt to solve this problem b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13636v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13636v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13636v1-abstract-full" style="display: none;"> Audio tagging is an important task of mapping audio samples to their corresponding categories. Recently endeavours that exploit transformer models in this field have achieved great success. However, the quadratic self-attention cost limits the scaling of audio transformer models and further constrains the development of more universal audio models. In this paper, we attempt to solve this problem by proposing Audio Mamba, a self-attention-free approach that captures long audio spectrogram dependency with state space models. Our experimental results on two audio-tagging datasets demonstrate the parameter efficiency of Audio Mamba, it achieves comparable results to SOTA audio spectrogram transformers with one third parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13636v1-abstract-full').style.display = 'none'; document.getElementById('2405.13636v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.07994">arXiv:2405.07994</a> <span> [<a href="https://arxiv.org/pdf/2405.07994">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BubbleID: A Deep Learning Framework for Bubble Interface Dynamics Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Dunlap%2C+C">Christy Dunlap</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Changgen Li</a>, <a href="/search/eess?searchtype=author&query=Pandey%2C+H">Hari Pandey</a>, <a href="/search/eess?searchtype=author&query=Le%2C+N">Ngan Le</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Han Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.07994v1-abstract-short" style="display: inline;"> This paper presents BubbleID, a sophisticated deep learning architecture designed to comprehensively identify both static and dynamic attributes of bubbles within sequences of boiling images. By amalgamating segmentation powered by Mask R-CNN with SORT-based tracking techniques, the framework is capable of analyzing each bubble's location, dimensions, interface shape, and velocity over its lifetim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07994v1-abstract-full').style.display = 'inline'; document.getElementById('2405.07994v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.07994v1-abstract-full" style="display: none;"> This paper presents BubbleID, a sophisticated deep learning architecture designed to comprehensively identify both static and dynamic attributes of bubbles within sequences of boiling images. By amalgamating segmentation powered by Mask R-CNN with SORT-based tracking techniques, the framework is capable of analyzing each bubble's location, dimensions, interface shape, and velocity over its lifetime, and capturing dynamic events such as bubble departure. BubbleID is trained and tested on boiling images across diverse heater surfaces and operational settings. This paper also offers a comparative analysis of bubble interface dynamics prior to and post-critical heat flux (CHF) conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07994v1-abstract-full').style.display = 'none'; document.getElementById('2405.07994v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13456">arXiv:2404.13456</a> <span> [<a href="https://arxiv.org/pdf/2404.13456">pdf</a>, <a href="https://arxiv.org/format/2404.13456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Safe Control of Neural Network Dynamic Models with Sound Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanjiang Hu</a>, <a href="/search/eess?searchtype=author&query=Lan%2C+J">Jianglin Lan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Changliu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13456v2-abstract-short" style="display: inline;"> Safe control of neural network dynamic models (NNDMs) is important to robotics and many applications. However, it remains challenging to compute an optimal safe control in real time for NNDM. To enable real-time computation, we propose to use a sound approximation of the NNDM in the control synthesis. In particular, we propose Bernstein over-approximated neural dynamics (BOND) based on the Bernste… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13456v2-abstract-full').style.display = 'inline'; document.getElementById('2404.13456v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13456v2-abstract-full" style="display: none;"> Safe control of neural network dynamic models (NNDMs) is important to robotics and many applications. However, it remains challenging to compute an optimal safe control in real time for NNDM. To enable real-time computation, we propose to use a sound approximation of the NNDM in the control synthesis. In particular, we propose Bernstein over-approximated neural dynamics (BOND) based on the Bernstein polynomial over-approximation (BPO) of ReLU activation functions in NNDM. To mitigate the errors introduced by the approximation and to ensure persistent feasibility of the safe control problems, we synthesize a worst-case safety index using the most unsafe approximated state within the BPO relaxation of NNDM offline. For the online real-time optimization, we formulate the first-order Taylor approximation of the nonlinear worst-case safety constraint as an additional linear layer of NNDM with the l2 bounded bias term for the higher-order remainder. Comprehensive experiments with different neural dynamics and safety constraints show that with safety guaranteed, our NNDMs with sound approximation are 10-100 times faster than the safe control baseline that uses mixed integer programming (MIP), validating the effectiveness of the worst-case safety index and scalability of the proposed BOND in real-time large-scale settings. The code is available at https://github.com/intelligent-control-lab/BOND. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13456v2-abstract-full').style.display = 'none'; document.getElementById('2404.13456v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Camera-ready version of L4DC 2024, 12 pages, 3 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16361">arXiv:2403.16361</a> <span> [<a href="https://arxiv.org/pdf/2403.16361">pdf</a>, <a href="https://arxiv.org/format/2403.16361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RSTAR4D: Rotational Streak Artifact Reduction in 4D CBCT using a Separable 4D CNN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Deng%2C+Z">Ziheng Deng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hua Chen</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yongzheng Zhou</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haibo Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Zhiyong Xu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiayuan Sun</a>, <a href="/search/eess?searchtype=author&query=Lyu%2C+T">Tianling Lyu</a>, <a href="/search/eess?searchtype=author&query=Xi%2C+Y">Yan Xi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yang Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+J">Jun Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16361v4-abstract-short" style="display: inline;"> Four-dimensional cone-beam computed tomography (4D CBCT) provides respiration-resolved images and can be used for image-guided radiation therapy. However, the ability to reveal respiratory motion comes at the cost of image artifacts. As raw projection data are sorted into multiple respiratory phases, the cone-beam projections become much sparser and the reconstructed 4D CBCT images will be covered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16361v4-abstract-full').style.display = 'inline'; document.getElementById('2403.16361v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16361v4-abstract-full" style="display: none;"> Four-dimensional cone-beam computed tomography (4D CBCT) provides respiration-resolved images and can be used for image-guided radiation therapy. However, the ability to reveal respiratory motion comes at the cost of image artifacts. As raw projection data are sorted into multiple respiratory phases, the cone-beam projections become much sparser and the reconstructed 4D CBCT images will be covered by severe streak artifacts. Although several deep learning-based methods have been proposed to address this issue, most algorithms employ 2D network models as backbones, neglecting the intrinsic structural priors within 4D CBCT images. In this paper, we first explore the origin and appearance of streak artifacts in 4D CBCT images. We find that streak artifacts exhibit a unique rotational motion along with the patient's respiration, distinguishable from diaphragm-driven respiratory motion in the spatiotemporal domain. Therefore, we propose a novel 4D neural network model, RSTAR4D-Net, designed to address Rotational STreak Artifact Reduction by integrating the spatial and temporal information within 4D CBCT images. Specifically, we overcome the computational and training difficulties of a 4D neural network. The specially designed model adopts an efficient implementation of 4D convolutions to reduce computational costs and thus can process the whole 4D image in one pass. Additionally, a Tetris training strategy pertinent to the separable 4D convolutions is proposed to effectively train the model using limited 4D training samples. Extensive experiments substantiate the effectiveness of our proposed method, and the RSTAR4D-Net shows superior performance compared to other methods. The source code and dynamic demos are available at https://github.com/ivy9092111111/RSTAR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16361v4-abstract-full').style.display = 'none'; document.getElementById('2403.16361v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18070">arXiv:2402.18070</a> <span> [<a href="https://arxiv.org/pdf/2402.18070">pdf</a>, <a href="https://arxiv.org/format/2402.18070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Hierarchical Dataflow-Driven Heterogeneous Architecture for Wireless Baseband Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+L">Limin Jiang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yi Shi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haiqin Hu</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+Q">Qingyu Deng</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Siyi Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yintao Liu</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+F">Feng Yuan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Si Wang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+Y">Yihao Shen</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+F">Fangfang Ye</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+S">Shan Cao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Z">Zhiyuan Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18070v1-abstract-short" style="display: inline;"> Wireless baseband processing (WBP) is a key element of wireless communications, with a series of signal processing modules to improve data throughput and counter channel fading. Conventional hardware solutions, such as digital signal processors (DSPs) and more recently, graphic processing units (GPUs), provide various degrees of parallelism, yet they both fail to take into account the cyclical and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18070v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18070v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18070v1-abstract-full" style="display: none;"> Wireless baseband processing (WBP) is a key element of wireless communications, with a series of signal processing modules to improve data throughput and counter channel fading. Conventional hardware solutions, such as digital signal processors (DSPs) and more recently, graphic processing units (GPUs), provide various degrees of parallelism, yet they both fail to take into account the cyclical and consecutive character of WBP. Furthermore, the large amount of data in WBPs cannot be processed quickly in symmetric multiprocessors (SMPs) due to the unpredictability of memory latency. To address this issue, we propose a hierarchical dataflow-driven architecture to accelerate WBP. A pack-and-ship approach is presented under a non-uniform memory access (NUMA) architecture to allow the subordinate tiles to operate in a bundled access and execute manner. We also propose a multi-level dataflow model and the related scheduling scheme to manage and allocate the heterogeneous hardware resources. Experiment results demonstrate that our prototype achieves $2\times$ and $2.3\times$ speedup in terms of normalized throughput and single-tile clock cycles compared with GPU and DSP counterparts in several critical WBP benchmarks. Additionally, a link-level throughput of $288$ Mbps can be achieved with a $45$-core configuration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18070v1-abstract-full').style.display = 'none'; document.getElementById('2402.18070v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 7 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14174">arXiv:2402.14174</a> <span> [<a href="https://arxiv.org/pdf/2402.14174">pdf</a>, <a href="https://arxiv.org/format/2402.14174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Blending Data-Driven Priors in Dynamic Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lidard%2C+J">Justin Lidard</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/eess?searchtype=author&query=Hancock%2C+A">Asher Hancock</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zixu Zhang</a>, <a href="/search/eess?searchtype=author&query=Contreras%2C+A+G">Albert Gim贸 Contreras</a>, <a href="/search/eess?searchtype=author&query=Modi%2C+V">Vikash Modi</a>, <a href="/search/eess?searchtype=author&query=DeCastro%2C+J">Jonathan DeCastro</a>, <a href="/search/eess?searchtype=author&query=Gopinath%2C+D">Deepak Gopinath</a>, <a href="/search/eess?searchtype=author&query=Rosman%2C+G">Guy Rosman</a>, <a href="/search/eess?searchtype=author&query=Leonard%2C+N+E">Naomi Ehrich Leonard</a>, <a href="/search/eess?searchtype=author&query=Santos%2C+M">Mar铆a Santos</a>, <a href="/search/eess?searchtype=author&query=Fisac%2C+J+F">Jaime Fern谩ndez Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14174v3-abstract-short" style="display: inline;"> As intelligent robots like autonomous vehicles become increasingly deployed in the presence of people, the extent to which these systems should leverage model-based game-theoretic planners versus data-driven policies for safe, interaction-aware motion planning remains an open question. Existing dynamic game formulations assume all agents are task-driven and behave optimally. However, in reality, h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14174v3-abstract-full').style.display = 'inline'; document.getElementById('2402.14174v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14174v3-abstract-full" style="display: none;"> As intelligent robots like autonomous vehicles become increasingly deployed in the presence of people, the extent to which these systems should leverage model-based game-theoretic planners versus data-driven policies for safe, interaction-aware motion planning remains an open question. Existing dynamic game formulations assume all agents are task-driven and behave optimally. However, in reality, humans tend to deviate from the decisions prescribed by these models, and their behavior is better approximated under a noisy-rational paradigm. In this work, we investigate a principled methodology to blend a data-driven reference policy with an optimization-based game-theoretic policy. We formulate KLGame, an algorithm for solving non-cooperative dynamic game with Kullback-Leibler (KL) regularization with respect to a general, stochastic, and possibly multi-modal reference policy. Our method incorporates, for each decision maker, a tunable parameter that permits modulation between task-driven and data-driven behaviors. We propose an efficient algorithm for computing multi-modal approximate feedback Nash equilibrium strategies of KLGame in real time. Through a series of simulated and real-world autonomous driving scenarios, we demonstrate that KLGame policies can more effectively incorporate guidance from the reference policy and account for noisily-rational human behaviors versus non-regularized baselines. Website with additional information, videos, and code: https://kl-games.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14174v3-abstract-full').style.display = 'none'; document.getElementById('2402.14174v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09246">arXiv:2402.09246</a> <span> [<a href="https://arxiv.org/pdf/2402.09246">pdf</a>, <a href="https://arxiv.org/format/2402.09246">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Who Plays First? Optimizing the Order of Play in Stackelberg Games with Many Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/eess?searchtype=author&query=Dragotto%2C+G">Gabriele Dragotto</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zixu Zhang</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+K">Kaiqu Liang</a>, <a href="/search/eess?searchtype=author&query=Stellato%2C+B">Bartolomeo Stellato</a>, <a href="/search/eess?searchtype=author&query=Fisac%2C+J+F">Jaime F. Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09246v4-abstract-short" style="display: inline;"> We consider the multi-agent spatial navigation problem of computing the socially optimal order of play, i.e., the sequence in which the agents commit to their decisions, and its associated equilibrium in an N-player Stackelberg trajectory game. We model this problem as a mixed-integer optimization problem over the space of all possible Stackelberg games associated with the order of play's permutat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09246v4-abstract-full').style.display = 'inline'; document.getElementById('2402.09246v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09246v4-abstract-full" style="display: none;"> We consider the multi-agent spatial navigation problem of computing the socially optimal order of play, i.e., the sequence in which the agents commit to their decisions, and its associated equilibrium in an N-player Stackelberg trajectory game. We model this problem as a mixed-integer optimization problem over the space of all possible Stackelberg games associated with the order of play's permutations. To solve the problem, we introduce Branch and Play (B&P), an efficient and exact algorithm that provably converges to a socially optimal order of play and its Stackelberg equilibrium. As a subroutine for B&P, we employ and extend sequential trajectory planning, i.e., a popular multi-agent control approach, to scalably compute valid local Stackelberg equilibria for any given order of play. We demonstrate the practical utility of B&P to coordinate air traffic control, swarm formation, and delivery vehicle fleets. We find that B&P consistently outperforms various baselines, and computes the socially optimal equilibrium. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09246v4-abstract-full').style.display = 'none'; document.getElementById('2402.09246v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Robotics: Science and Systems (RSS) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13766">arXiv:2401.13766</a> <span> [<a href="https://arxiv.org/pdf/2401.13766">pdf</a>, <a href="https://arxiv.org/ps/2401.13766">ps</a>, <a href="https://arxiv.org/format/2401.13766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Bayesian adaptive learning to latent variables via Variational Bayes and Maximum a Posteriori </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hu Hu</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13766v1-abstract-short" style="display: inline;"> In this work, we aim to establish a Bayesian adaptive learning framework by focusing on estimating latent variables in deep neural network (DNN) models. Latent variables indeed encode both transferable distributional information and structural relationships. Thus the distributions of the source latent variables (prior) can be combined with the knowledge learned from the target data (likelihood) to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13766v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13766v1-abstract-full" style="display: none;"> In this work, we aim to establish a Bayesian adaptive learning framework by focusing on estimating latent variables in deep neural network (DNN) models. Latent variables indeed encode both transferable distributional information and structural relationships. Thus the distributions of the source latent variables (prior) can be combined with the knowledge learned from the target data (likelihood) to yield the distributions of the target latent variables (posterior) with the goal of addressing acoustic mismatches between training and testing conditions. The prior knowledge transfer is accomplished through Variational Bayes (VB). In addition, we also investigate Maximum a Posteriori (MAP) based Bayesian adaptation. Experimental results on device adaptation in acoustic scene classification show that our proposed approaches can obtain good improvements on target devices, and consistently outperforms other cut-edging algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13766v1-abstract-full').style.display = 'none'; document.getElementById('2401.13766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ASRU2023 Bayesian Symposium. arXiv admin note: text overlap with arXiv:2110.08598</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09455">arXiv:2401.09455</a> <span> [<a href="https://arxiv.org/pdf/2401.09455">pdf</a>, <a href="https://arxiv.org/format/2401.09455">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Routing for Integrated Satellite-Terrestrial Networks: A Constrained Multi-Agent Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lyu%2C+Y">Yifeng Lyu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+R">Rongfei Fan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zhi Liu</a>, <a href="/search/eess?searchtype=author&query=An%2C+J">Jianping An</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+S">Shiwen Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09455v1-abstract-short" style="display: inline;"> The integrated satellite-terrestrial network (ISTN) system has experienced significant growth, offering seamless communication services in remote areas with limited terrestrial infrastructure. However, designing a routing scheme for ISTN is exceedingly difficult, primarily due to the heightened complexity resulting from the inclusion of additional ground stations, along with the requirement to sat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09455v1-abstract-full').style.display = 'inline'; document.getElementById('2401.09455v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09455v1-abstract-full" style="display: none;"> The integrated satellite-terrestrial network (ISTN) system has experienced significant growth, offering seamless communication services in remote areas with limited terrestrial infrastructure. However, designing a routing scheme for ISTN is exceedingly difficult, primarily due to the heightened complexity resulting from the inclusion of additional ground stations, along with the requirement to satisfy various constraints related to satellite service quality. To address these challenges, we study packet routing with ground stations and satellites working jointly to transmit packets, while prioritizing fast communication and meeting energy efficiency and packet loss requirements. Specifically, we formulate the problem of packet routing with constraints as a max-min problem using the Lagrange method. Then we propose a novel constrained Multi-Agent reinforcement learning (MARL) dynamic routing algorithm named CMADR, which efficiently balances objective improvement and constraint satisfaction during the updating of policy and Lagrange multipliers. Finally, we conduct extensive experiments and an ablation study using the OneWeb and Telesat mega-constellations. Results demonstrate that CMADR reduces the packet delay by a minimum of 21% and 15%, while meeting stringent energy consumption and packet loss rate constraints, outperforming several baseline algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09455v1-abstract-full').style.display = 'none'; document.getElementById('2401.09455v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03664">arXiv:2401.03664</a> <span> [<a href="https://arxiv.org/pdf/2401.03664">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dual-Channel Reliable Breast Ultrasound Image Classification Based on Explainable Attribution and Uncertainty Quantification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lei%2C+S">Shuge Lei</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haonan Hu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+D">Dasheng Sun</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huabin Zhang</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+K">Kehong Yuan</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+J">Jian Dai</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+J">Jijun Tang</a>, <a href="/search/eess?searchtype=author&query=Tong%2C+Y">Yan Tong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03664v1-abstract-short" style="display: inline;"> This paper focuses on the classification task of breast ultrasound images and researches on the reliability measurement of classification results. We proposed a dual-channel evaluation framework based on the proposed inference reliability and predictive reliability scores. For the inference reliability evaluation, human-aligned and doctor-agreed inference rationales based on the improved feature a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03664v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03664v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03664v1-abstract-full" style="display: none;"> This paper focuses on the classification task of breast ultrasound images and researches on the reliability measurement of classification results. We proposed a dual-channel evaluation framework based on the proposed inference reliability and predictive reliability scores. For the inference reliability evaluation, human-aligned and doctor-agreed inference rationales based on the improved feature attribution algorithm SP-RISA are gracefully applied. Uncertainty quantification is used to evaluate the predictive reliability via the Test Time Enhancement. The effectiveness of this reliability evaluation framework has been verified on our breast ultrasound clinical dataset YBUS, and its robustness is verified on the public dataset BUSI. The expected calibration errors on both datasets are significantly lower than traditional evaluation methods, which proves the effectiveness of our proposed reliability measurement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03664v1-abstract-full').style.display = 'none'; document.getElementById('2401.03664v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15721">arXiv:2312.15721</a> <span> [<a href="https://arxiv.org/pdf/2312.15721">pdf</a>, <a href="https://arxiv.org/ps/2312.15721">ps</a>, <a href="https://arxiv.org/format/2312.15721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> UAV Trajectory Tracking via RNN-enhanced IMM-KF with ADS-B Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yian Zhu</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+Z">Ziye Jia</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qihui Wu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/eess?searchtype=author&query=Zhuang%2C+Z">Zirui Zhuang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Huiling Hu</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Q">Qi Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15721v1-abstract-short" style="display: inline;"> With the increasing use of autonomous unmanned aerial vehicles (UAVs), it is critical to ensure that they are continuously tracked and controlled, especially when UAVs operate beyond the communication range of ground stations (GSs). Conventional surveillance methods for UAVs, such as satellite communications, ground mobile networks and radars are subject to high costs and latency. The automatic de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15721v1-abstract-full').style.display = 'inline'; document.getElementById('2312.15721v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15721v1-abstract-full" style="display: none;"> With the increasing use of autonomous unmanned aerial vehicles (UAVs), it is critical to ensure that they are continuously tracked and controlled, especially when UAVs operate beyond the communication range of ground stations (GSs). Conventional surveillance methods for UAVs, such as satellite communications, ground mobile networks and radars are subject to high costs and latency. The automatic dependent surveillance-broadcast (ADS-B) emerges as a promising method to monitor UAVs, due to the advantages of real-time capabilities, easy deployment and affordable cost. Therefore, we employ the ADS-B for UAV trajectory tracking in this work. However, the inherent noise in the transmitted data poses an obstacle for precisely tracking UAVs. Hence, we propose the algorithm of recurrent neural network-enhanced interacting multiple model-Kalman filter (RNN-enhanced IMM-KF) for UAV trajectory filtering. Specifically, the algorithm utilizes the RNN to capture the maneuvering behavior of UAVs and the noise level in the ADS-B data. Moreover, accurate UAV tracking is achieved by adaptively adjusting the process noise matrix and observation noise matrix of IMM-KF with the assistance of the RNN. The proposed algorithm can facilitate GSs to make timely decisions during trajectory deviations of UAVs and improve the airspace safety. Finally, via comprehensive simulations, the total root mean square error of the proposed algorithm decreases by 28.56%, compared to the traditional IMM-KF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15721v1-abstract-full').style.display = 'none'; document.getElementById('2312.15721v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.14563">arXiv:2312.14563</a> <span> [<a href="https://arxiv.org/pdf/2312.14563">pdf</a>, <a href="https://arxiv.org/format/2312.14563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> AI Generated Signal for Wireless Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=He%2C+H">Hanxiang He</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/eess?searchtype=author&query=Huan%2C+X">Xintao Huan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Heng Liu</a>, <a href="/search/eess?searchtype=author&query=An%2C+J">Jianping An</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+S">Shiwen Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.14563v1-abstract-short" style="display: inline;"> Deep learning has significantly advanced wireless sensing technology by leveraging substantial amounts of high-quality training data. However, collecting wireless sensing data encounters diverse challenges, including unavoidable data noise, limited data scale due to significant collection overhead, and the necessity to reacquire data in new environments. Taking inspiration from the achievements of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14563v1-abstract-full').style.display = 'inline'; document.getElementById('2312.14563v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.14563v1-abstract-full" style="display: none;"> Deep learning has significantly advanced wireless sensing technology by leveraging substantial amounts of high-quality training data. However, collecting wireless sensing data encounters diverse challenges, including unavoidable data noise, limited data scale due to significant collection overhead, and the necessity to reacquire data in new environments. Taking inspiration from the achievements of AI-generated content, this paper introduces a signal generation method that achieves data denoising, augmentation, and synthesis by disentangling distinct attributes within the signal, such as individual and environment. The approach encompasses two pivotal modules: structured signal selection and signal disentanglement generation. Structured signal selection establishes a minimal signal set with the target attributes for subsequent attribute disentanglement. Signal disentanglement generation disentangles the target attributes and reassembles them to generate novel signals. Extensive experimental results demonstrate that the proposed method can generate data that closely resembles real-world data on two wireless sensing datasets, exhibiting state-of-the-art performance. Our approach presents a robust framework for comprehending and manipulating attribute-specific information in wireless sensing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14563v1-abstract-full').style.display = 'none'; document.getElementById('2312.14563v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 6 figures, published to Globecom2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04786">arXiv:2312.04786</a> <span> [<a href="https://arxiv.org/pdf/2312.04786">pdf</a>, <a href="https://arxiv.org/format/2312.04786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint User Association, Interference Cancellation and Power Control for Multi-IRS Assisted UAV Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ning%2C+Z">Zhaolong Ning</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hao Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaojie Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Yuen%2C+C">Chau Yuen</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F+R">F. Richard Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04786v1-abstract-short" style="display: inline;"> Intelligent reflecting surface (IRS)-assisted unmanned aerial vehicle (UAV) communications are expected to alleviate the load of ground base stations in a cost-effective way. Existing studies mainly focus on the deployment and resource allocation of a single IRS instead of multiple IRSs, whereas it is extremely challenging for joint multi-IRS multi-user association in UAV communications with const… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04786v1-abstract-full').style.display = 'inline'; document.getElementById('2312.04786v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04786v1-abstract-full" style="display: none;"> Intelligent reflecting surface (IRS)-assisted unmanned aerial vehicle (UAV) communications are expected to alleviate the load of ground base stations in a cost-effective way. Existing studies mainly focus on the deployment and resource allocation of a single IRS instead of multiple IRSs, whereas it is extremely challenging for joint multi-IRS multi-user association in UAV communications with constrained reflecting resources and dynamic scenarios. To address the aforementioned challenges, we propose a new optimization algorithm for joint IRS-user association, trajectory optimization of UAVs, successive interference cancellation (SIC) decoding order scheduling and power allocation to maximize system energy efficiency. We first propose an inverse soft-Q learning-based algorithm to optimize multi-IRS multi-user association. Then, SCA and Dinkelbach-based algorithm are leveraged to optimize UAV trajectory followed by the optimization of SIC decoding order scheduling and power allocation. Finally, theoretical analysis and performance results show significant advantages of the designed algorithm in convergence rate and energy efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04786v1-abstract-full').style.display = 'none'; document.getElementById('2312.04786v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00567">arXiv:2311.00567</a> <span> [<a href="https://arxiv.org/pdf/2311.00567">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> A Robust Deep Learning Method with Uncertainty Estimation for the Pathological Classification of Renal Cell Carcinoma based on CT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yao%2C+N">Ni Yao</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hang Hu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+K">Kaicong Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Chen Zhao</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yuan Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+B">Boya Li</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+J">Jiaofen Nan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yanting Li</a>, <a href="/search/eess?searchtype=author&query=Han%2C+C">Chuang Han</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+F">Fubao Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+W">Weihua Zhou</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+L">Li Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00567v2-abstract-short" style="display: inline;"> Objectives To develop and validate a deep learning-based diagnostic model incorporating uncertainty estimation so as to facilitate radiologists in the preoperative differentiation of the pathological subtypes of renal cell carcinoma (RCC) based on CT images. Methods Data from 668 consecutive patients, pathologically proven RCC, were retrospectively collected from Center 1. By using five-fold cross… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00567v2-abstract-full').style.display = 'inline'; document.getElementById('2311.00567v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00567v2-abstract-full" style="display: none;"> Objectives To develop and validate a deep learning-based diagnostic model incorporating uncertainty estimation so as to facilitate radiologists in the preoperative differentiation of the pathological subtypes of renal cell carcinoma (RCC) based on CT images. Methods Data from 668 consecutive patients, pathologically proven RCC, were retrospectively collected from Center 1. By using five-fold cross-validation, a deep learning model incorporating uncertainty estimation was developed to classify RCC subtypes into clear cell RCC (ccRCC), papillary RCC (pRCC), and chromophobe RCC (chRCC). An external validation set of 78 patients from Center 2 further evaluated the model's performance. Results In the five-fold cross-validation, the model's area under the receiver operating characteristic curve (AUC) for the classification of ccRCC, pRCC, and chRCC was 0.868 (95% CI: 0.826-0.923), 0.846 (95% CI: 0.812-0.886), and 0.839 (95% CI: 0.802-0.88), respectively. In the external validation set, the AUCs were 0.856 (95% CI: 0.838-0.882), 0.787 (95% CI: 0.757-0.818), and 0.793 (95% CI: 0.758-0.831) for ccRCC, pRCC, and chRCC, respectively. Conclusions The developed deep learning model demonstrated robust performance in predicting the pathological subtypes of RCC, while the incorporated uncertainty emphasized the importance of understanding model confidence, which is crucial for assisting clinical decision-making for patients with renal tumors. Clinical relevance statement Our deep learning approach, integrated with uncertainty estimation, offers clinicians a dual advantage: accurate RCC subtype predictions complemented by diagnostic confidence references, promoting informed decision-making for patients with RCC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00567v2-abstract-full').style.display = 'none'; document.getElementById('2311.00567v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.20289">arXiv:2310.20289</a> <span> [<a href="https://arxiv.org/pdf/2310.20289">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> C-Silicon-based metasurfaces for aperture-robust spectrometer/imaging with angle integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+W">Weizhu Xu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Q">Qingbin Fan</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+P">Peicheng Lin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiarong Wang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hao Hu</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+T">Tao Yue</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+X">Xuemei Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+T">Ting Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.20289v1-abstract-short" style="display: inline;"> Compared with conventional grating-based spectrometers, reconstructive spectrometers based on spectrally engineered filtering have the advantage of miniaturization because of the less demand for dispersive optics and free propagation space. However, available reconstructive spectrometers fail to balance the performance on operational bandwidth, spectral diversity and angular stability. In this wor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20289v1-abstract-full').style.display = 'inline'; document.getElementById('2310.20289v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.20289v1-abstract-full" style="display: none;"> Compared with conventional grating-based spectrometers, reconstructive spectrometers based on spectrally engineered filtering have the advantage of miniaturization because of the less demand for dispersive optics and free propagation space. However, available reconstructive spectrometers fail to balance the performance on operational bandwidth, spectral diversity and angular stability. In this work, we proposed a compact silicon metasurfaces based spectrometer/camera. After angle integration, the spectral response of the system is robust to angle/aperture within a wide working bandwidth from 400nm to 800nm. It is experimentally demonstrated that the proposed method could maintain the spectral consistency from F/1.8 to F/4 (The corresponding angle of incident light ranges from 7掳 to 16掳) and the incident hyperspectral signal could be accurately reconstructed with a fidelity exceeding 99%. Additionally, a spectral imaging system with 400x400 pixels is also established in this work. The accurate reconstructed hyperspectral image indicates that the proposed aperture-robust spectrometer has the potential to be extended as a high-resolution broadband hyperspectral camera. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20289v1-abstract-full').style.display = 'none'; document.getElementById('2310.20289v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06678">arXiv:2310.06678</a> <span> [<a href="https://arxiv.org/pdf/2310.06678">pdf</a>, <a href="https://arxiv.org/format/2310.06678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Modelling and Performance Analysis of the Over-the-Air Computing in Cellular IoT Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Dong%2C+Y">Ying Dong</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haonan Hu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Qiaoshou Liu</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+T">Tingwei Lv</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qianbin Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jie Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06678v1-abstract-short" style="display: inline;"> Ultra-fast wireless data aggregation (WDA) of distributed data has emerged as a critical design challenge in the ultra-densely deployed cellular internet of things network (CITN) due to limited spectral resources. Over-the-air computing (AirComp) has been proposed as an effective solution for ultra-fast WDA by exploiting the superposition property of wireless channels. However, the effect of acces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06678v1-abstract-full').style.display = 'inline'; document.getElementById('2310.06678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06678v1-abstract-full" style="display: none;"> Ultra-fast wireless data aggregation (WDA) of distributed data has emerged as a critical design challenge in the ultra-densely deployed cellular internet of things network (CITN) due to limited spectral resources. Over-the-air computing (AirComp) has been proposed as an effective solution for ultra-fast WDA by exploiting the superposition property of wireless channels. However, the effect of access radius of access point (AP) on the AirComp performance has not been investigated yet. Therefore, in this work, the mean square error (MSE) performance of AirComp in the ultra-densely deployed CITN is analyzed with the AP access radius. By modelling the spatial locations of internet of things devices as a Poisson point process, the expression of MSE is derived in an analytical form, which is validated by Monte Carlo simulations. Based on the analytical MSE, we investigate the effect of AP access radius on the MSE of AirComp numerically. The results show that there exists an optimal AP access radius for AirComp, which can decrease the MSE by up to 12.7%. It indicates that the AP access radius should be carefully chosen to improve the AirComp performance in the ultra-densely deployed CITN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06678v1-abstract-full').style.display = 'none'; document.getElementById('2310.06678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.16077">arXiv:2309.16077</a> <span> [<a href="https://arxiv.org/pdf/2309.16077">pdf</a>, <a href="https://arxiv.org/format/2309.16077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Task-Oriented Koopman-Based Control with Contrastive Encoder </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lyu%2C+X">Xubo Lyu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanyang Hu</a>, <a href="/search/eess?searchtype=author&query=Siriya%2C+S">Seth Siriya</a>, <a href="/search/eess?searchtype=author&query=Pu%2C+Y">Ye Pu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Mo Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.16077v2-abstract-short" style="display: inline;"> We present task-oriented Koopman-based control that utilizes end-to-end reinforcement learning and contrastive encoder to simultaneously learn the Koopman latent embedding, operator, and associated linear controller within an iterative loop. By prioritizing the task cost as the main objective for controller learning, we reduce the reliance of controller design on a well-identified model, which, fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16077v2-abstract-full').style.display = 'inline'; document.getElementById('2309.16077v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.16077v2-abstract-full" style="display: none;"> We present task-oriented Koopman-based control that utilizes end-to-end reinforcement learning and contrastive encoder to simultaneously learn the Koopman latent embedding, operator, and associated linear controller within an iterative loop. By prioritizing the task cost as the main objective for controller learning, we reduce the reliance of controller design on a well-identified model, which, for the first time to the best of our knowledge, extends Koopman control from low to high-dimensional, complex nonlinear systems, including pixel-based tasks and a real robot with lidar observations. Code and videos are available \href{https://sites.google.com/view/kpmlilatsupp/}{here}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16077v2-abstract-full').style.display = 'none'; document.getElementById('2309.16077v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 7th Annual Conference on Robot Learning (CoRL), 2023 (oral spotlight)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.13155">arXiv:2309.13155</a> <span> [<a href="https://arxiv.org/pdf/2309.13155">pdf</a>, <a href="https://arxiv.org/format/2309.13155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Reach-Avoid Games: Two Attackers Versus One Defender and Mixed Integer Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hanyang Hu</a>, <a href="/search/eess?searchtype=author&query=Bui%2C+M">Minh Bui</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Mo Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.13155v1-abstract-short" style="display: inline;"> We propose a hybrid approach that combines Hamilton-Jacobi (HJ) reachability and mixed-integer optimization for solving a reach-avoid game with multiple attackers and defenders. The reach-avoid game is an important problem with potential applications in air traffic control and multi-agent motion planning; however, solving this game for many attackers and defenders is intractable due to the adversa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13155v1-abstract-full').style.display = 'inline'; document.getElementById('2309.13155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.13155v1-abstract-full" style="display: none;"> We propose a hybrid approach that combines Hamilton-Jacobi (HJ) reachability and mixed-integer optimization for solving a reach-avoid game with multiple attackers and defenders. The reach-avoid game is an important problem with potential applications in air traffic control and multi-agent motion planning; however, solving this game for many attackers and defenders is intractable due to the adversarial nature of the agents and the high problem dimensionality. In this paper, we first propose an HJ reachability-based method for solving the reach-avoid game in which 2 attackers are playing against 1 defender; we derive the numerically convergent optimal winning sets for the two sides in environments with obstacles. Utilizing this result and previous results for the 1 vs. 1 game, we further propose solving the general multi-agent reach-avoid game by determining the defender assignments that can maximize the number of attackers captured via a Mixed Integer Program (MIP). Our method generalizes previous state-of-the-art results and is especially useful when there are fewer defenders than attackers. We validate our theoretical results in numerical simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13155v1-abstract-full').style.display = 'none'; document.getElementById('2309.13155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.05837">arXiv:2309.05837</a> <span> [<a href="https://arxiv.org/pdf/2309.05837">pdf</a>, <a href="https://arxiv.org/format/2309.05837">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> The Safety Filter: A Unified View of Safety-Critical Control in Autonomous Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hsu%2C+K">Kai-Chieh Hsu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/eess?searchtype=author&query=Fisac%2C+J+F">Jaime Fern谩ndez Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.05837v1-abstract-short" style="display: inline;"> Recent years have seen significant progress in the realm of robot autonomy, accompanied by the expanding reach of robotic technologies. However, the emergence of new deployment domains brings unprecedented challenges in ensuring safe operation of these systems, which remains as crucial as ever. While traditional model-based safe control methods struggle with generalizability and scalability, emerg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05837v1-abstract-full').style.display = 'inline'; document.getElementById('2309.05837v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.05837v1-abstract-full" style="display: none;"> Recent years have seen significant progress in the realm of robot autonomy, accompanied by the expanding reach of robotic technologies. However, the emergence of new deployment domains brings unprecedented challenges in ensuring safe operation of these systems, which remains as crucial as ever. While traditional model-based safe control methods struggle with generalizability and scalability, emerging data-driven approaches tend to lack well-understood guarantees, which can result in unpredictable catastrophic failures. Successful deployment of the next generation of autonomous robots will require integrating the strengths of both paradigms. This article provides a review of safety filter approaches, highlighting important connections between existing techniques and proposing a unified technical framework to understand, compare, and combine them. The new unified view exposes a shared modular structure across a range of seemingly disparate safety filter classes and naturally suggests directions for future progress towards more scalable synthesis, robust monitoring, and efficient intervention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05837v1-abstract-full').style.display = 'none'; document.getElementById('2309.05837v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in Annual Review of Control, Robotics, and Autonomous Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.04335">arXiv:2309.04335</a> <span> [<a href="https://arxiv.org/pdf/2309.04335">pdf</a>, <a href="https://arxiv.org/ps/2309.04335">ps</a>, <a href="https://arxiv.org/format/2309.04335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> On the performance of an integrated communication and localization system: an analytical framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haonan Hu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Y">Yanliang Jin</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shugong Xu</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+X">Xiaoli Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.04335v1-abstract-short" style="display: inline;"> Quantifying the performance bound of an integrated localization and communication (ILAC) system and the trade-off between communication and localization performance is critical. In this letter, we consider an ILAC system that can perform communication and localization via time-domain or frequency-domain resource allocation. We develop an analytical framework to derive the closed-form expression of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04335v1-abstract-full').style.display = 'inline'; document.getElementById('2309.04335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.04335v1-abstract-full" style="display: none;"> Quantifying the performance bound of an integrated localization and communication (ILAC) system and the trade-off between communication and localization performance is critical. In this letter, we consider an ILAC system that can perform communication and localization via time-domain or frequency-domain resource allocation. We develop an analytical framework to derive the closed-form expression of the capacity loss versus localization Cramer-Rao lower bound (CRB) loss via time-domain and frequency-domain resource allocation. Simulation results validate the analytical model and demonstrate that frequency-domain resource allocation is preferable in scenarios with a smaller number of antennas at the next generation nodeB (gNB) and a larger distance between user equipment (UE) and gNB, while time-domain resource allocation is preferable in scenarios with a larger number of antennas and smaller distance between UE and the gNB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04335v1-abstract-full').style.display = 'none'; document.getElementById('2309.04335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.03900">arXiv:2309.03900</a> <span> [<a href="https://arxiv.org/pdf/2309.03900">pdf</a>, <a href="https://arxiv.org/format/2309.03900">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Continuous Exposure Value Representations for Single-Image HDR Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+S">Su-Kai Chen</a>, <a href="/search/eess?searchtype=author&query=Yen%2C+H">Hung-Lin Yen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yu-Lun Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Min-Hung Chen</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hou-Ning Hu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+W">Wen-Hsiao Peng</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yen-Yu Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.03900v1-abstract-short" style="display: inline;"> Deep learning is commonly used to reconstruct HDR images from LDR images. LDR stack-based methods are used for single-image HDR reconstruction, generating an HDR image from a deep learning-generated LDR stack. However, current methods generate the stack with predetermined exposure values (EVs), which may limit the quality of HDR reconstruction. To address this, we propose the continuous exposure v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.03900v1-abstract-full').style.display = 'inline'; document.getElementById('2309.03900v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.03900v1-abstract-full" style="display: none;"> Deep learning is commonly used to reconstruct HDR images from LDR images. LDR stack-based methods are used for single-image HDR reconstruction, generating an HDR image from a deep learning-generated LDR stack. However, current methods generate the stack with predetermined exposure values (EVs), which may limit the quality of HDR reconstruction. To address this, we propose the continuous exposure value representation (CEVR), which uses an implicit function to generate LDR images with arbitrary EVs, including those unseen during training. Our approach generates a continuous stack with more images containing diverse EVs, significantly improving HDR reconstruction. We use a cycle training strategy to supervise the model in generating continuous EV LDR images without corresponding ground truths. Our CEVR model outperforms existing methods, as demonstrated by experimental results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.03900v1-abstract-full').style.display = 'none'; document.getElementById('2309.03900v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2023. Project page: https://skchen1993.github.io/CEVR_web/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01267">arXiv:2309.01267</a> <span> [<a href="https://arxiv.org/pdf/2309.01267">pdf</a>, <a href="https://arxiv.org/format/2309.01267">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Deception Game: Closing the Safety-Learning Loop in Interactive Robot Autonomy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haimin Hu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zixu Zhang</a>, <a href="/search/eess?searchtype=author&query=Nakamura%2C+K">Kensuke Nakamura</a>, <a href="/search/eess?searchtype=author&query=Bajcsy%2C+A">Andrea Bajcsy</a>, <a href="/search/eess?searchtype=author&query=Fisac%2C+J+F">Jaime F. Fisac</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01267v2-abstract-short" style="display: inline;"> An outstanding challenge for the widespread deployment of robotic systems like autonomous vehicles is ensuring safe interaction with humans without sacrificing performance. Existing safety methods often neglect the robot's ability to learn and adapt at runtime, leading to overly conservative behavior. This paper proposes a new closed-loop paradigm for synthesizing safe control policies that explic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01267v2-abstract-full').style.display = 'inline'; document.getElementById('2309.01267v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01267v2-abstract-full" style="display: none;"> An outstanding challenge for the widespread deployment of robotic systems like autonomous vehicles is ensuring safe interaction with humans without sacrificing performance. Existing safety methods often neglect the robot's ability to learn and adapt at runtime, leading to overly conservative behavior. This paper proposes a new closed-loop paradigm for synthesizing safe control policies that explicitly account for the robot's evolving uncertainty and its ability to quickly respond to future scenarios as they arise, by jointly considering the physical dynamics and the robot's learning algorithm. We leverage adversarial reinforcement learning for tractable safety analysis under high-dimensional learning dynamics and demonstrate our framework's ability to work with both Bayesian belief propagation and implicit learning through large pre-trained neural trajectory predictors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01267v2-abstract-full').style.display = 'none'; document.getElementById('2309.01267v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Conference on Robot Learning 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00514">arXiv:2309.00514</a> <span> [<a href="https://arxiv.org/pdf/2309.00514">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Machine Vision Method for Correction of Eccentric Error: Based on Adaptive Enhancement Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanyi Wang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+P">Pin Cao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yihui Zhang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Haotian Hu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yongying Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00514v1-abstract-short" style="display: inline;"> In the procedure of surface defects detection for large-aperture aspherical optical elements, it is of vital significance to adjust the optical axis of the element to be coaxial with the mechanical spin axis accurately. Therefore, a machine vision method for eccentric error correction is proposed in this paper. Focusing on the severe defocus blur of reference crosshair image caused by the imaging… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00514v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00514v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00514v1-abstract-full" style="display: none;"> In the procedure of surface defects detection for large-aperture aspherical optical elements, it is of vital significance to adjust the optical axis of the element to be coaxial with the mechanical spin axis accurately. Therefore, a machine vision method for eccentric error correction is proposed in this paper. Focusing on the severe defocus blur of reference crosshair image caused by the imaging characteristic of the aspherical optical element, which may lead to the failure of correction, an Adaptive Enhancement Algorithm (AEA) is proposed to strengthen the crosshair image. AEA is consisted of existed Guided Filter Dark Channel Dehazing Algorithm (GFA) and proposed lightweight Multi-scale Densely Connected Network (MDC-Net). The enhancement effect of GFA is excellent but time-consuming, and the enhancement effect of MDC-Net is slightly inferior but strongly real-time. As AEA will be executed dozens of times during each correction procedure, its real-time performance is very important. Therefore, by setting the empirical threshold of definition evaluation function SMD2, GFA and MDC-Net are respectively applied to highly and slightly blurred crosshair images so as to ensure the enhancement effect while saving as much time as possible. AEA has certain robustness in time-consuming performance, which takes an average time of 0.2721s and 0.0963s to execute GFA and MDC-Net separately on ten 200pixels 200pixels Region of Interest (ROI) images with different degrees of blur. And the eccentricity error can be reduced to within 10um by our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00514v1-abstract-full').style.display = 'none'; document.getElementById('2309.00514v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01534">arXiv:2307.01534</a> <span> [<a href="https://arxiv.org/pdf/2307.01534">pdf</a>, <a href="https://arxiv.org/format/2307.01534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Impact of UAVs Equipped with ADS-B on the Civil Aviation Monitoring System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liao%2C+Y">Yiyang Liao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+Z">Ziye Jia</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yifan Zhang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qihui Wu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Huiling Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+B">Bin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01534v1-abstract-short" style="display: inline;"> In recent years, there is an increasing demand for unmanned aerial vehicles (UAVs) to complete multiple applications. However, as unmanned equipments, UAVs lead to some security risks to general civil aviations. In order to strengthen the flight management of UAVs and guarantee the safety, UAVs can be equipped with automatic dependent surveillance-broadcast (ADS-B) devices. In addition, as an auto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01534v1-abstract-full').style.display = 'inline'; document.getElementById('2307.01534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01534v1-abstract-full" style="display: none;"> In recent years, there is an increasing demand for unmanned aerial vehicles (UAVs) to complete multiple applications. However, as unmanned equipments, UAVs lead to some security risks to general civil aviations. In order to strengthen the flight management of UAVs and guarantee the safety, UAVs can be equipped with automatic dependent surveillance-broadcast (ADS-B) devices. In addition, as an automatic system, ADS-B can periodically broadcast flight information to the nearby aircrafts or the ground stations, and the technology is already used in civil aviation systems. However, due to the limited frequency of ADS-B technique, UAVs equipped with ADS-B devices result in the loss of packets to both UAVs and civil aviation. Further, the operation of civil aviation are seriously interfered. Hence, this paper firstly examines the packets loss of civil planes at different distance, then analyzes the impact of UAVs equipped with ADS-B on the packets updating of civil planes. The result indicates that the 1090MHz band blocking is affected by the density of UAVs. Besides, the frequency capacity is affected by the requirement of updating interval of civil planes. The position updating probability within 3s is 92.3% if there are 200 planes within 50km and 20 UAVs within 5km. The position updating probability within 3s is 86.9% if there are 200 planes within 50km and 40 UAVs within 5km. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01534v1-abstract-full').style.display = 'none'; document.getElementById('2307.01534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.16696">arXiv:2306.16696</a> <span> [<a href="https://arxiv.org/pdf/2306.16696">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1051/aacus/2024062">10.1051/aacus/2024062 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Computationally-efficient and perceptually-motivated rendering of diffuse reflections in room acoustics simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ewert%2C+S+D">Stephan D. Ewert</a>, <a href="/search/eess?searchtype=author&query=G%C3%B6%C3%9Fling%2C+N">Nico G枚脽ling</a>, <a href="/search/eess?searchtype=author&query=Buttler%2C+O">Oliver Buttler</a>, <a href="/search/eess?searchtype=author&query=van+de+Par%2C+S">Steven van de Par</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hongmei Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.16696v1-abstract-short" style="display: inline;"> Geometrical acoustics is well suited for simulating room reverberation in interactive real-time applications. While the image source model (ISM) is exceptionally fast, the restriction to specular reflections impacts its perceptual plausibility. To account for diffuse late reverberation, hybrid approaches have been proposed, e.g., using a feedback delay network (FDN) in combination with the ISM. He… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16696v1-abstract-full').style.display = 'inline'; document.getElementById('2306.16696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.16696v1-abstract-full" style="display: none;"> Geometrical acoustics is well suited for simulating room reverberation in interactive real-time applications. While the image source model (ISM) is exceptionally fast, the restriction to specular reflections impacts its perceptual plausibility. To account for diffuse late reverberation, hybrid approaches have been proposed, e.g., using a feedback delay network (FDN) in combination with the ISM. Here, a computationally-efficient, digital-filter approach is suggested to account for effects of non-specular reflections in the ISM and to couple scattered sound into a diffuse reverberation model using a spatially rendered FDN. Depending on the scattering coefficient of a room boundary, energy of each image source is split into a specular and a scattered part which is added to the diffuse sound field. Temporal effects as observed for an infinite ideal diffuse (Lambertian) reflector are simulated using cascaded all-pass filters. Effects of scattering and multiple (inter-) reflections caused by larger geometric disturbances at walls and by objects in the room are accounted for in a highly simplified manner. Using a single parameter to quantify deviations from an empty shoebox room, each reflection is temporally smeared using cascaded all-pass filters. The proposed method was perceptually evaluated against dummy head recordings of real rooms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16696v1-abstract-full').style.display = 'none'; document.getElementById('2306.16696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to Forum Acusticum 2023 for publication</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hu%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hu%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hu%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hu%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository