Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 120 results for author: <span class="mathjax">Shi, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Shi%2C+Z">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Shi, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Shi%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Shi, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09654">arXiv:2502.09654</a> <span> [<a href="https://arxiv.org/pdf/2502.09654">pdf</a>, <a href="https://arxiv.org/format/2502.09654">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneous Mixture of Experts for Remote Sensing Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+B">Bowen Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+K">Keyan Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+M">Mohan Yang</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+Z">Zhengxia Zou</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09654v1-abstract-short" style="display: inline;"> Remote sensing image super-resolution (SR) aims to reconstruct high-resolution remote sensing images from low-resolution inputs, thereby addressing limitations imposed by sensors and imaging conditions. However, the inherent characteristics of remote sensing images, including diverse ground object types and complex details, pose significant challenges to achieving high-quality reconstruction. Exis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09654v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09654v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09654v1-abstract-full" style="display: none;"> Remote sensing image super-resolution (SR) aims to reconstruct high-resolution remote sensing images from low-resolution inputs, thereby addressing limitations imposed by sensors and imaging conditions. However, the inherent characteristics of remote sensing images, including diverse ground object types and complex details, pose significant challenges to achieving high-quality reconstruction. Existing methods typically employ a uniform structure to process various types of ground objects without distinction, making it difficult to adapt to the complex characteristics of remote sensing images. To address this issue, we introduce a Mixture of Experts (MoE) model and design a set of heterogeneous experts. These experts are organized into multiple expert groups, where experts within each group are homogeneous while being heterogeneous across groups. This design ensures that specialized activation parameters can be employed to handle the diverse and intricate details of ground objects effectively. To better accommodate the heterogeneous experts, we propose a multi-level feature aggregation strategy to guide the routing process. Additionally, we develop a dual-routing mechanism to adaptively select the optimal expert for each pixel. Experiments conducted on the UCMerced and AID datasets demonstrate that our proposed method achieves superior SR reconstruction accuracy compared to state-of-the-art methods. The code will be available at https://github.com/Mr-Bamboo/MFG-HMoE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09654v1-abstract-full').style.display = 'none'; document.getElementById('2502.09654v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10705">arXiv:2501.10705</a> <span> [<a href="https://arxiv.org/pdf/2501.10705">pdf</a>, <a href="https://arxiv.org/format/2501.10705">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Secure Communication in Dynamic RDARS-Driven Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Pei%2C+Z">Ziqian Pei</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jintao Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Pingping Zhang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10705v1-abstract-short" style="display: inline;"> In this letter, we investigate a dynamic reconfigurable distributed antenna and reflection surface (RDARS)-driven secure communication system, where the working mode of the RDARS can be flexibly configured. We aim to maximize the secrecy rate by jointly designing the active beamforming vectors, reflection coefficients, and the channel-aware mode selection matrix. To address the non-convex binary a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10705v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10705v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10705v1-abstract-full" style="display: none;"> In this letter, we investigate a dynamic reconfigurable distributed antenna and reflection surface (RDARS)-driven secure communication system, where the working mode of the RDARS can be flexibly configured. We aim to maximize the secrecy rate by jointly designing the active beamforming vectors, reflection coefficients, and the channel-aware mode selection matrix. To address the non-convex binary and cardinality constraints introduced by dynamic mode selection, we propose an efficient alternating optimization (AO) framework that employs penalty-based fractional programming (FP) and successive convex approximation (SCA) transformations. Simulation results demonstrate the potential of RDARS in enhancing the secrecy rate and show its superiority compared to existing reflection surface-based schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10705v1-abstract-full').style.display = 'none'; document.getElementById('2501.10705v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18235">arXiv:2411.18235</a> <span> [<a href="https://arxiv.org/pdf/2411.18235">pdf</a>, <a href="https://arxiv.org/format/2411.18235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Certified Training with Branch-and-Bound: A Case Study on Lyapunov-stable Neural Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhouxing Shi</a>, <a href="/search/eess?searchtype=author&query=Hsieh%2C+C">Cho-Jui Hsieh</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18235v1-abstract-short" style="display: inline;"> We study the problem of learning Lyapunov-stable neural controllers which provably satisfy the Lyapunov asymptotic stability condition within a region-of-attraction. Compared to previous works which commonly used counterexample guided training on this task, we develop a new and generally formulated certified training framework named CT-BaB, and we optimize for differentiable verified bounds, to pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18235v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18235v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18235v1-abstract-full" style="display: none;"> We study the problem of learning Lyapunov-stable neural controllers which provably satisfy the Lyapunov asymptotic stability condition within a region-of-attraction. Compared to previous works which commonly used counterexample guided training on this task, we develop a new and generally formulated certified training framework named CT-BaB, and we optimize for differentiable verified bounds, to produce verification-friendly models. In order to handle the relatively large region-of-interest, we propose a novel framework of training-time branch-and-bound to dynamically maintain a training dataset of subregions throughout training, such that the hardest subregions are iteratively split into smaller ones whose verified bounds can be computed more tightly to ease the training. We demonstrate that our new training framework can produce models which can be more efficiently verified at test time. On the largest 2D quadrotor dynamical system, verification for our model is more than 5X faster compared to the baseline, while our size of region-of-attraction is 16X larger than the baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18235v1-abstract-full').style.display = 'none'; document.getElementById('2411.18235v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13095">arXiv:2409.13095</a> <span> [<a href="https://arxiv.org/pdf/2409.13095">pdf</a>, <a href="https://arxiv.org/format/2409.13095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Personalized Speech Recognition for Children with Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhonghao Shi</a>, <a href="/search/eess?searchtype=author&query=Srivastava%2C+H">Harshvardhan Srivastava</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+X">Xuan Shi</a>, <a href="/search/eess?searchtype=author&query=Narayanan%2C+S">Shrikanth Narayanan</a>, <a href="/search/eess?searchtype=author&query=Matari%C4%87%2C+M+J">Maja J. Matari膰</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13095v1-abstract-short" style="display: inline;"> Accurate automatic speech recognition (ASR) for children is crucial for effective real-time child-AI interaction, especially in educational applications. However, off-the-shelf ASR models primarily pre-trained on adult data tend to generalize poorly to children's speech due to the data domain shift from adults to children. Recent studies have found that supervised fine-tuning on children's speech… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13095v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13095v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13095v1-abstract-full" style="display: none;"> Accurate automatic speech recognition (ASR) for children is crucial for effective real-time child-AI interaction, especially in educational applications. However, off-the-shelf ASR models primarily pre-trained on adult data tend to generalize poorly to children's speech due to the data domain shift from adults to children. Recent studies have found that supervised fine-tuning on children's speech data can help bridge this domain shift, but human annotations may be impractical to obtain for real-world applications and adaptation at training time can overlook additional domain shifts occurring at test time. We devised a novel ASR pipeline to apply unsupervised test-time adaptation (TTA) methods for child speech recognition, so that ASR models pre-trained on adult speech can be continuously adapted to each child speaker at test time without further human annotations. Our results show that ASR models adapted with TTA methods significantly outperform the unadapted off-the-shelf ASR baselines both on average and statistically across individual child speakers. Our analysis also discovered significant data domain shifts both between child speakers and within each child speaker, which further motivates the need for test-time adaptation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13095v1-abstract-full').style.display = 'none'; document.getElementById('2409.13095v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06577">arXiv:2409.06577</a> <span> [<a href="https://arxiv.org/pdf/2409.06577">pdf</a>, <a href="https://arxiv.org/format/2409.06577">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Compressed Sensing based Detection Schemes for Differential Spatial Modulation in Visible Light Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zichun Shi</a>, <a href="/search/eess?searchtype=author&query=Miao%2C+P">Pu Miao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Peng Chen</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+L">Lei Xue</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+L">Li-Yang Zheng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Laiyuan Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+G">Gaojie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06577v1-abstract-short" style="display: inline;"> Differential spatial modulation (DSM) exploits the time dimension to facilitate the differential modulation, which can perfectly avoid the challenge in acquiring of heavily entangled channel state information of visible light communication (VLC) system. However, it has huge search space and high complexity for large number of transmitters. In this paper, a novel vector correction (VC)-based orthog… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06577v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06577v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06577v1-abstract-full" style="display: none;"> Differential spatial modulation (DSM) exploits the time dimension to facilitate the differential modulation, which can perfectly avoid the challenge in acquiring of heavily entangled channel state information of visible light communication (VLC) system. However, it has huge search space and high complexity for large number of transmitters. In this paper, a novel vector correction (VC)-based orthogonal matching pursuit (OMP) detection algorithm is proposed to reduce the complexity, which exploits the sparsity and relativity of all transmitters, and then employs a novel correction criterion by correcting the index vectors of the error estimation for improving the demodulation performance. To overcome the local optimum dilemma in the atoms searching, an OMP-assisted genetic algorithm is also proposed to further improve the bit error rate (BER) performance of the VLC-DSM system. Simulation results demonstrate that the proposed schemes can significantly reduce the computational complexity at least by 62.5% while achieving an excellent BER performance as compared with traditional maximum likelihood based receiver. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06577v1-abstract-full').style.display = 'none'; document.getElementById('2409.06577v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by 2024 IEEE 24th International Conference on Communication Technology (ICCT 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00130">arXiv:2409.00130</a> <span> [<a href="https://arxiv.org/pdf/2409.00130">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mirror contrastive loss based sliding window transformer for subject-independent motor imagery based EEG signal recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Luo%2C+J">Jing Luo</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+Q">Qi Mao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+W">Weiwei Shi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhenghao Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaofan Wang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xiaofeng Lu</a>, <a href="/search/eess?searchtype=author&query=Hei%2C+X">Xinhong Hei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00130v1-abstract-short" style="display: inline;"> While deep learning models have been extensively utilized in motor imagery based EEG signal recognition, they often operate as black boxes. Motivated by neurological findings indicating that the mental imagery of left or right-hand movement induces event-related desynchronization (ERD) in the contralateral sensorimotor area of the brain, we propose a Mirror Contrastive Loss based Sliding Window Tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00130v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00130v1-abstract-full" style="display: none;"> While deep learning models have been extensively utilized in motor imagery based EEG signal recognition, they often operate as black boxes. Motivated by neurological findings indicating that the mental imagery of left or right-hand movement induces event-related desynchronization (ERD) in the contralateral sensorimotor area of the brain, we propose a Mirror Contrastive Loss based Sliding Window Transformer (MCL-SWT) to enhance subject-independent motor imagery-based EEG signal recognition. Specifically, our proposed mirror contrastive loss enhances sensitivity to the spatial location of ERD by contrasting the original EEG signals with their mirror counterparts-mirror EEG signals generated by interchanging the channels of the left and right hemispheres of the EEG signals. Moreover, we introduce a temporal sliding window transformer that computes self-attention scores from high temporal resolution features, thereby improving model performance with manageable computational complexity. We evaluate the performance of MCL-SWT on subject-independent motor imagery EEG signal recognition tasks, and our experimental results demonstrate that MCL-SWT achieved accuracies of 66.48% and 75.62%, surpassing the state-of-the-art (SOTA) model by 2.82% and 2.17%, respectively. Furthermore, ablation experiments confirm the effectiveness of the proposed mirror contrastive loss. A code demo of MCL-SWT is available at https://github.com/roniusLuo/MCL_SWT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00130v1-abstract-full').style.display = 'none'; document.getElementById('2409.00130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by the Fourth International Workshop on Human Brain and Artificial Intelligence, joint workshop of the 33rd International Joint Conference on Artificial Intelligence, Jeju Island, South Korea, from August 3rd to August 9th, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05645">arXiv:2408.05645</a> <span> [<a href="https://arxiv.org/pdf/2408.05645">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BeyondCT: A deep learning model for predicting pulmonary function from chest CT scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Geng%2C+K">Kaiwen Geng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiyi Shi</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xiaoyan Zhao</a>, <a href="/search/eess?searchtype=author&query=Ali%2C+A">Alaa Ali</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/eess?searchtype=author&query=Leader%2C+J">Joseph Leader</a>, <a href="/search/eess?searchtype=author&query=Pu%2C+J">Jiantao Pu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05645v1-abstract-short" style="display: inline;"> Abstract Background: Pulmonary function tests (PFTs) and computed tomography (CT) imaging are vital in diagnosing, managing, and monitoring lung diseases. A common issue in practice is the lack of access to recorded pulmonary functions despite available chest CT scans. Purpose: To develop and validate a deep learning algorithm for predicting pulmonary function directly from chest CT scans. M… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05645v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05645v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05645v1-abstract-full" style="display: none;"> Abstract Background: Pulmonary function tests (PFTs) and computed tomography (CT) imaging are vital in diagnosing, managing, and monitoring lung diseases. A common issue in practice is the lack of access to recorded pulmonary functions despite available chest CT scans. Purpose: To develop and validate a deep learning algorithm for predicting pulmonary function directly from chest CT scans. Methods: The development cohort came from the Pittsburgh Lung Screening Study (PLuSS) (n=3619). The validation cohort came from the Specialized Centers of Clinically Oriented Research (SCCOR) in COPD (n=662). A deep learning model called BeyondCT, combining a three-dimensional (3D) convolutional neural network (CNN) and Vision Transformer (ViT) architecture, was used to predict forced vital capacity (FVC) and forced expiratory volume in one second (FEV1) from non-contrasted inspiratory chest CT scans. A 3D CNN model without ViT was used for comparison. Subject demographics (age, gender, smoking status) were also incorporated into the model. Performance was compared to actual PFTs using mean absolute error (MAE, L), percentage error, and R square. Results: The 3D-CNN model achieved MAEs of 0.395 L and 0.383 L, percentage errors of 13.84% and 18.85%, and R square of 0.665 and 0.679 for FVC and FEV1, respectively. The BeyondCT model without demographics had MAEs of 0.362 L and 0.371 L, percentage errors of 10.89% and 14.96%, and R square of 0.719 and 0.727, respectively. Including demographics improved performance (p<0.05), with MAEs of 0.356 L and 0.353 L, percentage errors of 10.79% and 14.82%, and R square of 0.77 and 0.739 for FVC and FEV1 in the test set. Conclusion: The BeyondCT model showed robust performance in predicting lung function from non-contrast inspiratory chest CT scans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05645v1-abstract-full').style.display = 'none'; document.getElementById('2408.05645v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 tables, 7 figures,22 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02095">arXiv:2408.02095</a> <span> [<a href="https://arxiv.org/pdf/2408.02095">pdf</a>, <a href="https://arxiv.org/format/2408.02095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Secure Semantic Communications: From Perspective of Physical Layer Security </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yongkang Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Han Hu</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+Y">Yaru Fu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+H">Hongjiang Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02095v1-abstract-short" style="display: inline;"> Semantic communications have been envisioned as a potential technique that goes beyond Shannon paradigm. Unlike modern communications that provide bit-level security, the eaves-dropping of semantic communications poses a significant risk of potentially exposing intention of legitimate user. To address this challenge, a novel deep neural network (DNN) enabled secure semantic communication (DeepSSC)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02095v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02095v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02095v1-abstract-full" style="display: none;"> Semantic communications have been envisioned as a potential technique that goes beyond Shannon paradigm. Unlike modern communications that provide bit-level security, the eaves-dropping of semantic communications poses a significant risk of potentially exposing intention of legitimate user. To address this challenge, a novel deep neural network (DNN) enabled secure semantic communication (DeepSSC) system is developed by capitalizing on physical layer security. To balance the tradeoff between security and reliability, a two-phase training method for DNNs is devised. Particularly, Phase I aims at semantic recovery of legitimate user, while Phase II attempts to minimize the leakage of semantic information to eavesdroppers. The loss functions of DeepSSC in Phases I and II are respectively designed according to Shannon capacity and secure channel capacity, which are approximated with variational inference. Moreover, we define the metric of secure bilingual evaluation understudy (S-BLEU) to assess the security of semantic communications. Finally, simulation results demonstrate that DeepSSC achieves a significant boost to semantic security particularly in high signal-to-noise ratio regime, despite a minor degradation of reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02095v1-abstract-full').style.display = 'none'; document.getElementById('2408.02095v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20518">arXiv:2407.20518</a> <span> [<a href="https://arxiv.org/pdf/2407.20518">pdf</a>, <a href="https://arxiv.org/format/2407.20518">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> High-Resolution Spatial Transcriptomics from Histology Images using HisToSGE </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiceng Shi</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+S">Shuailin Xue</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+F">Fangfang Zhu</a>, <a href="/search/eess?searchtype=author&query=Min%2C+W">Wenwen Min</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20518v1-abstract-short" style="display: inline;"> Spatial transcriptomics (ST) is a groundbreaking genomic technology that enables spatial localization analysis of gene expression within tissue sections. However, it is significantly limited by high costs and sparse spatial resolution. An alternative, more cost-effective strategy is to use deep learning methods to predict high-density gene expression profiles from histological images. However, exi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20518v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20518v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20518v1-abstract-full" style="display: none;"> Spatial transcriptomics (ST) is a groundbreaking genomic technology that enables spatial localization analysis of gene expression within tissue sections. However, it is significantly limited by high costs and sparse spatial resolution. An alternative, more cost-effective strategy is to use deep learning methods to predict high-density gene expression profiles from histological images. However, existing methods struggle to capture rich image features effectively or rely on low-dimensional positional coordinates, making it difficult to accurately predict high-resolution gene expression profiles. To address these limitations, we developed HisToSGE, a method that employs a Pathology Image Large Model (PILM) to extract rich image features from histological images and utilizes a feature learning module to robustly generate high-resolution gene expression profiles. We evaluated HisToSGE on four ST datasets, comparing its performance with five state-of-the-art baseline methods. The results demonstrate that HisToSGE excels in generating high-resolution gene expression profiles and performing downstream tasks such as spatial domain identification. All code and public datasets used in this paper are available at https://github.com/wenwenmin/HisToSGE and https://zenodo.org/records/12792163. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20518v1-abstract-full').style.display = 'none'; document.getElementById('2407.20518v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17902">arXiv:2407.17902</a> <span> [<a href="https://arxiv.org/pdf/2407.17902">pdf</a>, <a href="https://arxiv.org/format/2407.17902">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Multi-Stage Face-Voice Association Learning with Keynote Speaker Diarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tao%2C+R">Ruijie Tao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhan Shi</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yidi Jiang</a>, <a href="/search/eess?searchtype=author&query=Truong%2C+D">Duc-Tuan Truong</a>, <a href="/search/eess?searchtype=author&query=Chng%2C+E">Eng-Siong Chng</a>, <a href="/search/eess?searchtype=author&query=Alioto%2C+M">Massimo Alioto</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17902v1-abstract-short" style="display: inline;"> The human brain has the capability to associate the unknown person's voice and face by leveraging their general relationship, referred to as ``cross-modal speaker verification''. This task poses significant challenges due to the complex relationship between the modalities. In this paper, we propose a ``Multi-stage Face-voice Association Learning with Keynote Speaker Diarization''~(MFV-KSD) framewo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17902v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17902v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17902v1-abstract-full" style="display: none;"> The human brain has the capability to associate the unknown person's voice and face by leveraging their general relationship, referred to as ``cross-modal speaker verification''. This task poses significant challenges due to the complex relationship between the modalities. In this paper, we propose a ``Multi-stage Face-voice Association Learning with Keynote Speaker Diarization''~(MFV-KSD) framework. MFV-KSD contains a keynote speaker diarization front-end to effectively address the noisy speech inputs issue. To balance and enhance the intra-modal feature learning and inter-modal correlation understanding, MFV-KSD utilizes a novel three-stage training strategy. Our experimental results demonstrated robust performance, achieving the first rank in the 2024 Face-voice Association in Multilingual Environments (FAME) challenge with an overall Equal Error Rate (EER) of 19.9%. Details can be found in https://github.com/TaoRuijie/MFV-KSD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17902v1-abstract-full').style.display = 'none'; document.getElementById('2407.17902v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10427">arXiv:2407.10427</a> <span> [<a href="https://arxiv.org/pdf/2407.10427">pdf</a>, <a href="https://arxiv.org/format/2407.10427">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Transformer for Multitemporal Hyperspectral Image Unmixing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+Q">Qiankun Dong</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xueshuo Xie</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xia Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10427v1-abstract-short" style="display: inline;"> Multitemporal hyperspectral image unmixing (MTHU) holds significant importance in monitoring and analyzing the dynamic changes of surface. However, compared to single-temporal unmixing, the multitemporal approach demands comprehensive consideration of information across different phases, rendering it a greater challenge. To address this challenge, we propose the Multitemporal Hyperspectral Image U… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10427v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10427v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10427v1-abstract-full" style="display: none;"> Multitemporal hyperspectral image unmixing (MTHU) holds significant importance in monitoring and analyzing the dynamic changes of surface. However, compared to single-temporal unmixing, the multitemporal approach demands comprehensive consideration of information across different phases, rendering it a greater challenge. To address this challenge, we propose the Multitemporal Hyperspectral Image Unmixing Transformer (MUFormer), an end-to-end unsupervised deep learning model. To effectively perform multitemporal hyperspectral image unmixing, we introduce two key modules: the Global Awareness Module (GAM) and the Change Enhancement Module (CEM). The Global Awareness Module computes self-attention across all phases, facilitating global weight allocation. On the other hand, the Change Enhancement Module dynamically learns local temporal changes by comparing endmember changes between adjacent phases. The synergy between these modules allows for capturing semantic information regarding endmember and abundance changes, thereby enhancing the effectiveness of multitemporal hyperspectral image unmixing. We conducted experiments on one real dataset and two synthetic datasets, demonstrating that our model significantly enhances the effect of multitemporal hyperspectral image unmixing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10427v1-abstract-full').style.display = 'none'; document.getElementById('2407.10427v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08216">arXiv:2407.08216</a> <span> [<a href="https://arxiv.org/pdf/2407.08216">pdf</a>, <a href="https://arxiv.org/format/2407.08216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Multimodal contrastive learning for spatial gene expression prediction using histology images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Min%2C+W">Wenwen Min</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiceng Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Wan%2C+J">Jun Wan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Changmiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08216v1-abstract-short" style="display: inline;"> In recent years, the advent of spatial transcriptomics (ST) technology has unlocked unprecedented opportunities for delving into the complexities of gene expression patterns within intricate biological systems. Despite its transformative potential, the prohibitive cost of ST technology remains a significant barrier to its widespread adoption in large-scale studies. An alternative, more cost-effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08216v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08216v1-abstract-full" style="display: none;"> In recent years, the advent of spatial transcriptomics (ST) technology has unlocked unprecedented opportunities for delving into the complexities of gene expression patterns within intricate biological systems. Despite its transformative potential, the prohibitive cost of ST technology remains a significant barrier to its widespread adoption in large-scale studies. An alternative, more cost-effective strategy involves employing artificial intelligence to predict gene expression levels using readily accessible whole-slide images (WSIs) stained with Hematoxylin and Eosin (H\&E). However, existing methods have yet to fully capitalize on multimodal information provided by H&E images and ST data with spatial location. In this paper, we propose \textbf{mclSTExp}, a multimodal contrastive learning with Transformer and Densenet-121 encoder for Spatial Transcriptomics Expression prediction. We conceptualize each spot as a "word", integrating its intrinsic features with spatial context through the self-attention mechanism of a Transformer encoder. This integration is further enriched by incorporating image features via contrastive learning, thereby enhancing the predictive capability of our model. Our extensive evaluation of \textbf{mclSTExp} on two breast cancer datasets and a skin squamous cell carcinoma dataset demonstrates its superior performance in predicting spatial gene expression. Moreover, mclSTExp has shown promise in interpreting cancer-specific overexpressed genes, elucidating immune-related genes, and identifying specialized spatial domains annotated by pathologists. Our source code is available at https://github.com/shizhiceng/mclSTExp. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08216v1-abstract-full').style.display = 'none'; document.getElementById('2407.08216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BIB, Code: https://github.com/shizhiceng/mclSTExp</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00987">arXiv:2407.00987</a> <span> [<a href="https://arxiv.org/pdf/2407.00987">pdf</a>, <a href="https://arxiv.org/format/2407.00987">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Dependency-Aware Priority Adjustment for Mixed-Criticality TSN Flow Scheduling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+M">Miao Guo</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yifei Sun</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+C">Chaojie Gu</a>, <a href="/search/eess?searchtype=author&query=He%2C+S">Shibo He</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00987v1-abstract-short" style="display: inline;"> Time-Sensitive Networking (TSN) serves as a one-size-fits-all solution for mixed-criticality communication, in which flow scheduling is vital to guarantee real-time transmissions. Traditional approaches statically assign priorities to flows based on their associated applications, resulting in significant queuing delays. In this paper, we observe that assigning different priorities to a flow leads… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00987v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00987v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00987v1-abstract-full" style="display: none;"> Time-Sensitive Networking (TSN) serves as a one-size-fits-all solution for mixed-criticality communication, in which flow scheduling is vital to guarantee real-time transmissions. Traditional approaches statically assign priorities to flows based on their associated applications, resulting in significant queuing delays. In this paper, we observe that assigning different priorities to a flow leads to varying delays due to different shaping mechanisms applied to different flow types. Leveraging this insight, we introduce a new scheduling method in mixed-criticality TSN that incorporates a priority adjustment scheme among diverse flow types to mitigate queuing delays and enhance schedulability. Specifically, we propose dependency-aware priority adjustment algorithms tailored to different link-overlapping conditions. Experiments in various settings validate the effectiveness of the proposed method, which enhances the schedulability by 20.57% compared with the SOTA method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00987v1-abstract-full').style.display = 'none'; document.getElementById('2407.00987v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IWQoS'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19043">arXiv:2406.19043</a> <span> [<a href="https://arxiv.org/pdf/2406.19043">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> CMRxRecon2024: A Multi-Modality, Multi-View K-Space Dataset Boosting Universal Machine Learning for Accelerated Cardiac MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zi Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+C">Chen Qin</a>, <a href="/search/eess?searchtype=author&query=Lyu%2C+J">Jun Lyu</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+M">Mengyao Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Haoyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+K">Kunyuan Guo</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhang Shi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qirong Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Ziqiang Xu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yajing Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+S">Sha Hua</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+B">Binghua Chen</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+L">Longyu Sun</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+M">Mengting Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qin Li</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+Y">Ying-Hua Chu</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+W">Wenjia Bai</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+J">Jing Qin</a>, <a href="/search/eess?searchtype=author&query=Zhuang%2C+X">Xiahai Zhuang</a>, <a href="/search/eess?searchtype=author&query=Prieto%2C+C">Claudia Prieto</a> , et al. (7 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19043v2-abstract-short" style="display: inline;"> Cardiac magnetic resonance imaging (MRI) has emerged as a clinically gold-standard technique for diagnosing cardiac diseases, thanks to its ability to provide diverse information with multiple modalities and anatomical views. Accelerated cardiac MRI is highly expected to achieve time-efficient and patient-friendly imaging, and then advanced image reconstruction approaches are required to recover h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19043v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19043v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19043v2-abstract-full" style="display: none;"> Cardiac magnetic resonance imaging (MRI) has emerged as a clinically gold-standard technique for diagnosing cardiac diseases, thanks to its ability to provide diverse information with multiple modalities and anatomical views. Accelerated cardiac MRI is highly expected to achieve time-efficient and patient-friendly imaging, and then advanced image reconstruction approaches are required to recover high-quality, clinically interpretable images from undersampled measurements. However, the lack of publicly available cardiac MRI k-space dataset in terms of both quantity and diversity has severely hindered substantial technological progress, particularly for data-driven artificial intelligence. Here, we provide a standardized, diverse, and high-quality CMRxRecon2024 dataset to facilitate the technical development, fair evaluation, and clinical transfer of cardiac MRI reconstruction approaches, towards promoting the universal frameworks that enable fast and robust reconstructions across different cardiac MRI protocols in clinical practice. To the best of our knowledge, the CMRxRecon2024 dataset is the largest and most protocal-diverse publicly available cardiac k-space dataset. It is acquired from 330 healthy volunteers, covering commonly used modalities, anatomical views, and acquisition trajectories in clinical cardiac MRI workflows. Besides, an open platform with tutorials, benchmarks, and data processing tools is provided to facilitate data usage, advanced method development, and fair performance evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19043v2-abstract-full').style.display = 'none'; document.getElementById('2406.19043v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 3 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18993">arXiv:2406.18993</a> <span> [<a href="https://arxiv.org/pdf/2406.18993">pdf</a>, <a href="https://arxiv.org/ps/2406.18993">ps</a>, <a href="https://arxiv.org/format/2406.18993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Interference Cancellation Based Neural Receiver for Superimposed Pilot in Multi-Layer Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+H">Han Xiao</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+W">Wenqiang Tian</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+W">Wendong Liu</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+J">Jia Shen</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhihua Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18993v1-abstract-short" style="display: inline;"> In this paper, an interference cancellation based neural receiver for superimposed pilot (SIP) in multi-layer transmission is proposed, where the data and pilot are non-orthogonally superimposed in the same time-frequency resource. Specifically, to deal with the intra-layer and inter-layer interference of SIP under multi-layer transmission, the interference cancellation with superimposed symbol ai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18993v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18993v1-abstract-full" style="display: none;"> In this paper, an interference cancellation based neural receiver for superimposed pilot (SIP) in multi-layer transmission is proposed, where the data and pilot are non-orthogonally superimposed in the same time-frequency resource. Specifically, to deal with the intra-layer and inter-layer interference of SIP under multi-layer transmission, the interference cancellation with superimposed symbol aided channel estimation is leveraged in the neural receiver, accompanied by the pre-design of pilot code-division orthogonal mechanism at transmitter. In addition, to address the complexity issue for inter-vendor collaboration and the generalization problem in practical deployments, respectively, this paper also provides a fixed SIP (F-SIP) design based on constant pilot power ratio and scalable mechanisms for different modulation and coding schemes (MCSs) and transmission layers. Simulation results demonstrate the superiority of the proposed schemes on the performance of block error rate and throughput compared with existing counterparts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18993v1-abstract-full').style.display = 'none'; document.getElementById('2406.18993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15284">arXiv:2404.15284</a> <span> [<a href="https://arxiv.org/pdf/2404.15284">pdf</a>, <a href="https://arxiv.org/format/2404.15284">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Global 4D Ionospheric STEC Prediction based on DeepONet for GNSS Rays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cai%2C+D">Dijia Cai</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zenghui Shi</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+H">Haiyang Fu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Huan Liu</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+H">Hongyi Qian</a>, <a href="/search/eess?searchtype=author&query=Sui%2C+Y">Yun Sui</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+F">Feng Xu</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Y">Ya-Qiu Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15284v1-abstract-short" style="display: inline;"> The ionosphere is a vitally dynamic charged particle region in the Earth's upper atmosphere, playing a crucial role in applications such as radio communication and satellite navigation. The Slant Total Electron Contents (STEC) is an important parameter for characterizing wave propagation, representing the integrated electron density along the ray of radio signals passing through the ionosphere. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15284v1-abstract-full').style.display = 'inline'; document.getElementById('2404.15284v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15284v1-abstract-full" style="display: none;"> The ionosphere is a vitally dynamic charged particle region in the Earth's upper atmosphere, playing a crucial role in applications such as radio communication and satellite navigation. The Slant Total Electron Contents (STEC) is an important parameter for characterizing wave propagation, representing the integrated electron density along the ray of radio signals passing through the ionosphere. The accurate prediction of STEC is essential for mitigating the ionospheric impact particularly on Global Navigation Satellite Systems (GNSS). In this work, we propose a high-precision STEC prediction model named DeepONet-STEC, which learns nonlinear operators to predict the 4D temporal-spatial integrated parameter for specified ground station - satellite ray path globally. As a demonstration, we validate the performance of the model based on GNSS observation data for global and US-CORS regimes under ionospheric quiet and storm conditions. The DeepONet-STEC model results show that the three-day 72 hour prediction in quiet periods could achieve high accuracy using observation data by the Precise Point Positioning (PPP) with temporal resolution 30s. Under active solar magnetic storm periods, the DeepONet-STEC also demonstrated its robustness and superiority than traditional deep learning methods. This work presents a neural operator regression architecture for predicting the 4D temporal-spatial ionospheric parameter for satellite navigation system performance, which may be further extended for various space applications and beyond. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15284v1-abstract-full').style.display = 'none'; document.getElementById('2404.15284v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07956">arXiv:2404.07956</a> <span> [<a href="https://arxiv.org/pdf/2404.07956">pdf</a>, <a href="https://arxiv.org/format/2404.07956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Lyapunov-stable Neural Control for State and Output Feedback: A Novel Formulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+L">Lujie Yang</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+H">Hongkai Dai</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhouxing Shi</a>, <a href="/search/eess?searchtype=author&query=Hsieh%2C+C">Cho-Jui Hsieh</a>, <a href="/search/eess?searchtype=author&query=Tedrake%2C+R">Russ Tedrake</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07956v2-abstract-short" style="display: inline;"> Learning-based neural network (NN) control policies have shown impressive empirical performance in a wide range of tasks in robotics and control. However, formal (Lyapunov) stability guarantees over the region-of-attraction (ROA) for NN controllers with nonlinear dynamical systems are challenging to obtain, and most existing approaches rely on expensive solvers such as sums-of-squares (SOS), mixed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07956v2-abstract-full').style.display = 'inline'; document.getElementById('2404.07956v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07956v2-abstract-full" style="display: none;"> Learning-based neural network (NN) control policies have shown impressive empirical performance in a wide range of tasks in robotics and control. However, formal (Lyapunov) stability guarantees over the region-of-attraction (ROA) for NN controllers with nonlinear dynamical systems are challenging to obtain, and most existing approaches rely on expensive solvers such as sums-of-squares (SOS), mixed-integer programming (MIP), or satisfiability modulo theories (SMT). In this paper, we demonstrate a new framework for learning NN controllers together with Lyapunov certificates using fast empirical falsification and strategic regularizations. We propose a novel formulation that defines a larger verifiable region-of-attraction (ROA) than shown in the literature, and refines the conventional restrictive constraints on Lyapunov derivatives to focus only on certifiable ROAs. The Lyapunov condition is rigorously verified post-hoc using branch-and-bound with scalable linear bound propagation-based NN verification techniques. The approach is efficient and flexible, and the full training and verification procedure is accelerated on GPUs without relying on expensive solvers for SOS, MIP, nor SMT. The flexibility and efficiency of our framework allow us to demonstrate Lyapunov-stable output feedback control with synthesized NN-based controllers and NN-based observers with formal stability guarantees, for the first time in literature. Source code at https://github.com/Verified-Intelligence/Lyapunov_Stable_NN_Controllers <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07956v2-abstract-full').style.display = 'none'; document.getElementById('2404.07956v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted by ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01082">arXiv:2404.01082</a> <span> [<a href="https://arxiv.org/pdf/2404.01082">pdf</a>, <a href="https://arxiv.org/format/2404.01082">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The state-of-the-art in Cardiac MRI Reconstruction: Results of the CMRxRecon Challenge in MICCAI 2023 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lyu%2C+J">Jun Lyu</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+C">Chen Qin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zi Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+K">Kunyuan Guo</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/eess?searchtype=author&query=T%C3%A4nzer%2C+M">Michael T盲nzer</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+M">Meng Liu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+L">Longyu Sun</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+M">Mengting Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qin Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhang Shi</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+S">Sha Hua</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhensen Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhenlin Zhang</a>, <a href="/search/eess?searchtype=author&query=Xin%2C+B">Bingyu Xin</a>, <a href="/search/eess?searchtype=author&query=Metaxas%2C+D+N">Dimitris N. Metaxas</a>, <a href="/search/eess?searchtype=author&query=Yiasemis%2C+G">George Yiasemis</a>, <a href="/search/eess?searchtype=author&query=Teuwen%2C+J">Jonas Teuwen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Liping Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weitian Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yidong Zhao</a> , et al. (25 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01082v2-abstract-short" style="display: inline;"> Cardiac MRI, crucial for evaluating heart structure and function, faces limitations like slow imaging and motion artifacts. Undersampling reconstruction, especially data-driven algorithms, has emerged as a promising solution to accelerate scans and enhance imaging performance using highly under-sampled data. Nevertheless, the scarcity of publicly available cardiac k-space datasets and evaluation p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01082v2-abstract-full').style.display = 'inline'; document.getElementById('2404.01082v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01082v2-abstract-full" style="display: none;"> Cardiac MRI, crucial for evaluating heart structure and function, faces limitations like slow imaging and motion artifacts. Undersampling reconstruction, especially data-driven algorithms, has emerged as a promising solution to accelerate scans and enhance imaging performance using highly under-sampled data. Nevertheless, the scarcity of publicly available cardiac k-space datasets and evaluation platform hinder the development of data-driven reconstruction algorithms. To address this issue, we organized the Cardiac MRI Reconstruction Challenge (CMRxRecon) in 2023, in collaboration with the 26th International Conference on MICCAI. CMRxRecon presented an extensive k-space dataset comprising cine and mapping raw data, accompanied by detailed annotations of cardiac anatomical structures. With overwhelming participation, the challenge attracted more than 285 teams and over 600 participants. Among them, 22 teams successfully submitted Docker containers for the testing phase, with 7 teams submitted for both cine and mapping tasks. All teams use deep learning based approaches, indicating that deep learning has predominately become a promising solution for the problem. The first-place winner of both tasks utilizes the E2E-VarNet architecture as backbones. In contrast, U-Net is still the most popular backbone for both multi-coil and single-coil reconstructions. This paper provides a comprehensive overview of the challenge design, presents a summary of the submitted results, reviews the employed methods, and offers an in-depth discussion that aims to inspire future advancements in cardiac MRI reconstruction models. The summary emphasizes the effective strategies observed in Cardiac MRI reconstruction, including backbone architecture, loss function, pre-processing techniques, physical modeling, and model complexity, thereby providing valuable insights for further developments in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01082v2-abstract-full').style.display = 'none'; document.getElementById('2404.01082v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00863">arXiv:2404.00863</a> <span> [<a href="https://arxiv.org/pdf/2404.00863">pdf</a>, <a href="https://arxiv.org/format/2404.00863">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Voice Conversion Augmentation for Speaker Recognition on Defective Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tao%2C+R">Ruijie Tao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhan Shi</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yidi Jiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+T">Tianchi Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00863v1-abstract-short" style="display: inline;"> Modern speaker recognition system relies on abundant and balanced datasets for classification training. However, diverse defective datasets, such as partially-labelled, small-scale, and imbalanced datasets, are common in real-world applications. Previous works usually studied specific solutions for each scenario from the algorithm perspective. However, the root cause of these problems lies in data… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00863v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00863v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00863v1-abstract-full" style="display: none;"> Modern speaker recognition system relies on abundant and balanced datasets for classification training. However, diverse defective datasets, such as partially-labelled, small-scale, and imbalanced datasets, are common in real-world applications. Previous works usually studied specific solutions for each scenario from the algorithm perspective. However, the root cause of these problems lies in dataset imperfections. To address these challenges with a unified solution, we propose the Voice Conversion Augmentation (VCA) strategy to obtain pseudo speech from the training set. Furthermore, to guarantee generation quality, we designed the VCA-NN~(nearest neighbours) strategy to select source speech from utterances that are close to the target speech in the representation space. Our experimental results on three created datasets demonstrated that VCA-NN effectively mitigates these dataset problems, which provides a new direction for handling the speaker recognition problems from the data aspect. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00863v1-abstract-full').style.display = 'none'; document.getElementById('2404.00863v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.20198">arXiv:2403.20198</a> <span> [<a href="https://arxiv.org/pdf/2403.20198">pdf</a>, <a href="https://arxiv.org/format/2403.20198">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Minimizing End-to-End Latency for Joint Source-Channel Coding Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chi%2C+K">Kaiyi Chi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/eess?searchtype=author&query=Shu%2C+Y">Yuanchao Shu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.20198v1-abstract-short" style="display: inline;"> While existing studies have highlighted the advantages of deep learning (DL)-based joint source-channel coding (JSCC) schemes in enhancing transmission efficiency, they often overlook the crucial aspect of resource management during the deployment phase. In this paper, we propose an approach to minimize the transmission latency in an uplink JSCC-based system. We first analyze the correlation betwe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20198v1-abstract-full').style.display = 'inline'; document.getElementById('2403.20198v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.20198v1-abstract-full" style="display: none;"> While existing studies have highlighted the advantages of deep learning (DL)-based joint source-channel coding (JSCC) schemes in enhancing transmission efficiency, they often overlook the crucial aspect of resource management during the deployment phase. In this paper, we propose an approach to minimize the transmission latency in an uplink JSCC-based system. We first analyze the correlation between end-to-end latency and task performance, based on which the end-to-end delay model for each device is established. Then, we formulate a non-convex optimization problem aiming at minimizing the maximum end-to-end latency across all devices, which is proved to be NP-hard. We then transform the original problem into a more tractable one, from which we derive the closed form solution on the optimal compression ratio, truncation threshold selection policy, and resource allocation strategy. We further introduce a heuristic algorithm with low complexity, leveraging insights from the structure of the optimal solution. Simulation results demonstrate that both the proposed optimal algorithm and the heuristic algorithm significantly reduce end-to-end latency. Notably, the proposed heuristic algorithm achieves nearly the same performance to the optimal solution but with considerably lower computational complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20198v1-abstract-full').style.display = 'none'; document.getElementById('2403.20198v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 Pages, 5 Figures, accepted by 2024 IEEE ICC Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18134">arXiv:2403.18134</a> <span> [<a href="https://arxiv.org/pdf/2403.18134">pdf</a>, <a href="https://arxiv.org/format/2403.18134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Integrative Graph-Transformer Framework for Histopathology Whole Slide Image Representation and Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhan Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jingwei Zhang</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+J">Jun Kong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fusheng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18134v1-abstract-short" style="display: inline;"> In digital pathology, the multiple instance learning (MIL) strategy is widely used in the weakly supervised histopathology whole slide image (WSI) classification task where giga-pixel WSIs are only labeled at the slide level. However, existing attention-based MIL approaches often overlook contextual information and intrinsic spatial relationships between neighboring tissue tiles, while graph-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18134v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18134v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18134v1-abstract-full" style="display: none;"> In digital pathology, the multiple instance learning (MIL) strategy is widely used in the weakly supervised histopathology whole slide image (WSI) classification task where giga-pixel WSIs are only labeled at the slide level. However, existing attention-based MIL approaches often overlook contextual information and intrinsic spatial relationships between neighboring tissue tiles, while graph-based MIL frameworks have limited power to recognize the long-range dependencies. In this paper, we introduce the integrative graph-transformer framework that simultaneously captures the context-aware relational features and global WSI representations through a novel Graph Transformer Integration (GTI) block. Specifically, each GTI block consists of a Graph Convolutional Network (GCN) layer modeling neighboring relations at the local instance level and an efficient global attention model capturing comprehensive global information from extensive feature embeddings. Extensive experiments on three publicly available WSI datasets: TCGA-NSCLC, TCGA-RCC and BRIGHT, demonstrate the superiority of our approach over current state-of-the-art MIL methods, achieving an improvement of 1.0% to 2.6% in accuracy and 0.7%-1.6% in AUROC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18134v1-abstract-full').style.display = 'none'; document.getElementById('2403.18134v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13562">arXiv:2403.13562</a> <span> [<a href="https://arxiv.org/pdf/2403.13562">pdf</a>, <a href="https://arxiv.org/format/2403.13562">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Augmented LRFS-based Filter: Holistic Tracking of Group Objects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chaoqun Yang</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+X">Xiaowei Liang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Heng Zhang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+X">Xianghui Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13562v4-abstract-short" style="display: inline;"> This paper addresses the problem of group target tracking (GTT), wherein multiple closely spaced targets within a group pose a coordinated motion. To improve the tracking performance, the labeled random finite sets (LRFSs) theory is adopted, and this paper develops a new kind of LRFSs, i.e., augmented LRFSs, which introduces group information into the definition of LRFSs. Specifically, for each el… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13562v4-abstract-full').style.display = 'inline'; document.getElementById('2403.13562v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13562v4-abstract-full" style="display: none;"> This paper addresses the problem of group target tracking (GTT), wherein multiple closely spaced targets within a group pose a coordinated motion. To improve the tracking performance, the labeled random finite sets (LRFSs) theory is adopted, and this paper develops a new kind of LRFSs, i.e., augmented LRFSs, which introduces group information into the definition of LRFSs. Specifically, for each element in an LRFS, the kinetic states, track label, and the corresponding group information of its represented target are incorporated. Furthermore, by means of the labeled multi-Bernoulli (LMB) filter with the proposed augmented LRFSs, the group structure is iteratively propagated and updated during the tracking process, which achieves the simultaneously estimation of the kinetic states, track label, and the corresponding group information of multiple group targets, and further improves the GTT tracking performance. Finally, simulation experiments are provided, which well demonstrates the effectiveness of the labeled multi-Bernoulli filter with the proposed augmented LRFSs for GTT tracking. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13562v4-abstract-full').style.display = 'none'; document.getElementById('2403.13562v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01093">arXiv:2403.01093</a> <span> [<a href="https://arxiv.org/pdf/2403.01093">pdf</a>, <a href="https://arxiv.org/format/2403.01093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Variational Bayesian Learning Based Localization and Channel Reconstruction in RIS-aided Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunfei Li</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yiting Luo</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xianda Wu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01093v1-abstract-short" style="display: inline;"> The emerging immersive and autonomous services have posed stringent requirements on both communications and localization. By considering the great potential of reconfigurable intelligent surface (RIS), this paper focuses on the joint channel estimation and localization for RIS-aided wireless systems. As opposed to existing works that treat channel estimation and localization independently, this pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01093v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01093v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01093v1-abstract-full" style="display: none;"> The emerging immersive and autonomous services have posed stringent requirements on both communications and localization. By considering the great potential of reconfigurable intelligent surface (RIS), this paper focuses on the joint channel estimation and localization for RIS-aided wireless systems. As opposed to existing works that treat channel estimation and localization independently, this paper exploits the intrinsic coupling and nonlinear relationships between the channel parameters and user location for enhancement of both localization and channel reconstruction. By noticing the non-convex, nonlinear objective function and the sparser angle pattern, a variational Bayesian learning-based framework is developed to jointly estimate the channel parameters and user location through leveraging an effective approximation of the posterior distribution. The proposed framework is capable of unifying near-field and far-field scenarios owing to exploitation of sparsity of the angular domain. Since the joint channel and location estimation problem has a closed-form solution in each iteration, our proposed iterative algorithm performs better than the conventional particle swarm optimization (PSO) and maximum likelihood (ML) based ones in terms of computational complexity. Simulations demonstrate that the proposed algorithm almost reaches the Bayesian Cramer-Rao bound (BCRB) and achieves a superior estimation accuracy by comparing to the PSO and the ML algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01093v1-abstract-full').style.display = 'none'; document.getElementById('2403.01093v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15619">arXiv:2401.15619</a> <span> [<a href="https://arxiv.org/pdf/2401.15619">pdf</a>, <a href="https://arxiv.org/ps/2401.15619">ps</a>, <a href="https://arxiv.org/format/2401.15619">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A semidefinite programming approach for robust elliptic localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+W">Wenxin Xiong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuming Chen</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Jiajun He</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhang-Lei Shi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+K">Keyuan Hu</a>, <a href="/search/eess?searchtype=author&query=So%2C+H+C">Hing Cheung So</a>, <a href="/search/eess?searchtype=author&query=Leung%2C+C">Chi-Sing Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15619v2-abstract-short" style="display: inline;"> This short communication addresses the problem of elliptic localization with outlier measurements. Outliers are prevalent in various location-enabled applications, and can significantly compromise the positioning performance if not adequately handled. Instead of following the common trend of using $M$-estimation or adjusting the conventional least squares formulation by integrating extra error var… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15619v2-abstract-full').style.display = 'inline'; document.getElementById('2401.15619v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15619v2-abstract-full" style="display: none;"> This short communication addresses the problem of elliptic localization with outlier measurements. Outliers are prevalent in various location-enabled applications, and can significantly compromise the positioning performance if not adequately handled. Instead of following the common trend of using $M$-estimation or adjusting the conventional least squares formulation by integrating extra error variables, we take a different path. Specifically, we explore the worst-case robust approximation criterion to bolster resistance of the elliptic location estimator against outliers. From a geometric standpoint, our method boils down to pinpointing the Chebyshev center of a feasible set, which is defined by the available bistatic ranges with bounded measurement errors. For a practical approach to the associated min-max problem, we convert it into the convex optimization framework of semidefinite programming (SDP). Numerical simulations confirm that our SDP-based technique can outperform a number of existing elliptic localization schemes in terms of positioning accuracy in Gaussian mixture noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15619v2-abstract-full').style.display = 'none'; document.getElementById('2401.15619v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15564">arXiv:2401.15564</a> <span> [<a href="https://arxiv.org/pdf/2401.15564">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Design of UAV flight state recognition and trajectory prediction system based on trajectory feature construction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+X">Xingyu Zhou</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhuoyong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15564v1-abstract-short" style="display: inline;"> With the impact of artificial intelligence on the traditional UAV industry, autonomous UAV flight has become a current hot research field. Based on the demand for research on critical technologies for autonomous flying UAVs, this paper addresses the field of flight state recognition and trajectory prediction of UAVs. This paper proposes a method to improve the accuracy of UAV trajectory prediction… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15564v1-abstract-full').style.display = 'inline'; document.getElementById('2401.15564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15564v1-abstract-full" style="display: none;"> With the impact of artificial intelligence on the traditional UAV industry, autonomous UAV flight has become a current hot research field. Based on the demand for research on critical technologies for autonomous flying UAVs, this paper addresses the field of flight state recognition and trajectory prediction of UAVs. This paper proposes a method to improve the accuracy of UAV trajectory prediction based on UAV flight state recognition and verifies it using two prediction models. Firstly, UAV flight data acquisition and data preprocessing are carried out; secondly, UAV flight trajectory features are extracted based on data fusion and a UAV flight state recognition model based on PCA-DAGSVM model is established; finally, two UAV flight trajectory prediction models are established and the trajectory prediction errors of the two prediction models are compared and analyzed after flight state recognition. The results show that: 1) the UAV flight state recognition model based on PCA-DAGSVM has good recognition effect. 2) compared with the traditional UAV trajectory prediction model, the prediction model based on flight state recognition can effectively reduce the prediction error. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15564v1-abstract-full').style.display = 'none'; document.getElementById('2401.15564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.11960">arXiv:2401.11960</a> <span> [<a href="https://arxiv.org/pdf/2401.11960">pdf</a>, <a href="https://arxiv.org/format/2401.11960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Observation-Guided Meteorological Field Downscaling at Station Scale: A Benchmark and a New Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zili Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+L">Lei Bai</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wenyuan Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+K">Keyan Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhengyi Wang</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+Z">Zhengxia Zou</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.11960v1-abstract-short" style="display: inline;"> Downscaling (DS) of meteorological variables involves obtaining high-resolution states from low-resolution meteorological fields and is an important task in weather forecasting. Previous methods based on deep learning treat downscaling as a super-resolution task in computer vision and utilize high-resolution gridded meteorological fields as supervision to improve resolution at specific grid scales… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.11960v1-abstract-full').style.display = 'inline'; document.getElementById('2401.11960v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.11960v1-abstract-full" style="display: none;"> Downscaling (DS) of meteorological variables involves obtaining high-resolution states from low-resolution meteorological fields and is an important task in weather forecasting. Previous methods based on deep learning treat downscaling as a super-resolution task in computer vision and utilize high-resolution gridded meteorological fields as supervision to improve resolution at specific grid scales. However, this approach has struggled to align with the continuous distribution characteristics of meteorological fields, leading to an inherent systematic bias between the downscaled results and the actual observations at meteorological stations. In this paper, we extend meteorological downscaling to arbitrary scattered station scales, establish a brand new benchmark and dataset, and retrieve meteorological states at any given station location from a coarse-resolution meteorological field. Inspired by data assimilation techniques, we integrate observational data into the downscaling process, providing multi-scale observational priors. Building on this foundation, we propose a new downscaling model based on hypernetwork architecture, namely HyperDS, which efficiently integrates different observational information into the model training, achieving continuous scale modeling of the meteorological field. Through extensive experiments, our proposed method outperforms other specially designed baseline models on multiple surface variables. Notably, the mean squared error (MSE) for wind speed and surface pressure improved by 67% and 19.5% compared to other methods. We will release the dataset and code subsequently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.11960v1-abstract-full').style.display = 'none'; document.getElementById('2401.11960v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15575">arXiv:2312.15575</a> <span> [<a href="https://arxiv.org/pdf/2312.15575">pdf</a>, <a href="https://arxiv.org/format/2312.15575">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Neural Born Series Operator for Biomedical Ultrasound Computed Tomography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zeng%2C+Z">Zhijun Zeng</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yihang Zheng</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Youjia Zheng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yubing Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zuoqiang Shi</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">He Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15575v1-abstract-short" style="display: inline;"> Ultrasound Computed Tomography (USCT) provides a radiation-free option for high-resolution clinical imaging. Despite its potential, the computationally intensive Full Waveform Inversion (FWI) required for tissue property reconstruction limits its clinical utility. This paper introduces the Neural Born Series Operator (NBSO), a novel technique designed to speed up wave simulations, thereby facilita… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15575v1-abstract-full').style.display = 'inline'; document.getElementById('2312.15575v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15575v1-abstract-full" style="display: none;"> Ultrasound Computed Tomography (USCT) provides a radiation-free option for high-resolution clinical imaging. Despite its potential, the computationally intensive Full Waveform Inversion (FWI) required for tissue property reconstruction limits its clinical utility. This paper introduces the Neural Born Series Operator (NBSO), a novel technique designed to speed up wave simulations, thereby facilitating a more efficient USCT image reconstruction process through an NBSO-based FWI pipeline. Thoroughly validated on comprehensive brain and breast datasets, simulated under experimental USCT conditions, the NBSO proves to be accurate and efficient in both forward simulation and image reconstruction. This advancement demonstrates the potential of neural operators in facilitating near real-time USCT reconstruction, making the clinical application of USCT increasingly viable and promising. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15575v1-abstract-full').style.display = 'none'; document.getElementById('2312.15575v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.5; J.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04377">arXiv:2312.04377</a> <span> [<a href="https://arxiv.org/pdf/2312.04377">pdf</a>, <a href="https://arxiv.org/format/2312.04377">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> HARQ-IR Aided Short Packet Communications: BLER Analysis and Throughput Maximization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=He%2C+F">Fuchao He</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaofan Li</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+X">Xinrong Ye</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04377v2-abstract-short" style="display: inline;"> This paper introduces hybrid automatic repeat request with incremental redundancy (HARQ-IR) to boost the reliability of short packet communications. The finite blocklength information theory and correlated decoding events tremendously preclude the analysis of average block error rate (BLER). Fortunately, the recursive form of average BLER motivates us to calculate its value through the trapezoidal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04377v2-abstract-full').style.display = 'inline'; document.getElementById('2312.04377v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04377v2-abstract-full" style="display: none;"> This paper introduces hybrid automatic repeat request with incremental redundancy (HARQ-IR) to boost the reliability of short packet communications. The finite blocklength information theory and correlated decoding events tremendously preclude the analysis of average block error rate (BLER). Fortunately, the recursive form of average BLER motivates us to calculate its value through the trapezoidal approximation and Gauss-Laguerre quadrature. Moreover, the asymptotic analysis is performed to derive a simple expression for the average BLER at high signal-to-noise ratio (SNR). Then, we study the maximization of long term average throughput (LTAT) via power allocation meanwhile ensuring the power and the BLER constraints. For tractability, the asymptotic BLER is employed to solve the problem through geometric programming (GP). However, the GP-based solution underestimates the LTAT at low SNR due to a large approximation error in this case. Alternatively, we also develop a deep reinforcement learning (DRL)-based framework to learn power allocation policy. In particular, the optimization problem is transformed into a constrained Markov decision process, which is solved by integrating deep deterministic policy gradient (DDPG) with subgradient method. The numerical results finally demonstrate that the DRL-based method outperforms the GP-based one at low SNR, albeit at the cost of increasing computational burden. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04377v2-abstract-full').style.display = 'none'; document.getElementById('2312.04377v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02389">arXiv:2311.02389</a> <span> [<a href="https://arxiv.org/pdf/2311.02389">pdf</a>, <a href="https://arxiv.org/format/2311.02389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Multiplayer Homicidal Chauffeur Reach-Avoid Games: A Pursuit Enclosure Function Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yan%2C+R">Rui Yan</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+X">Xiaoming Duan</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+R">Rui Zou</a>, <a href="/search/eess?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zongying Shi</a>, <a href="/search/eess?searchtype=author&query=Bullo%2C+F">Francesco Bullo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02389v2-abstract-short" style="display: inline;"> This paper presents a multiplayer Homicidal Chauffeur reach-avoid differential game, which involves Dubins-car pursuers and simple-motion evaders. The goal of the pursuers is to cooperatively protect a planar convex region from the evaders, who strive to reach the region. We propose a cooperative strategy for the pursuers based on subgames for multiple pursuers against one evader and optimal task… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02389v2-abstract-full').style.display = 'inline'; document.getElementById('2311.02389v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02389v2-abstract-full" style="display: none;"> This paper presents a multiplayer Homicidal Chauffeur reach-avoid differential game, which involves Dubins-car pursuers and simple-motion evaders. The goal of the pursuers is to cooperatively protect a planar convex region from the evaders, who strive to reach the region. We propose a cooperative strategy for the pursuers based on subgames for multiple pursuers against one evader and optimal task allocation. We introduce pursuit enclosure functions (PEFs) and propose a new enclosure region pursuit (ERP) winning approach that supports forward analysis for the strategy synthesis in the subgames. We show that if a pursuit coalition is able to defend the region against an evader under the ERP winning, then no more than two pursuers in the coalition are necessarily needed. We also propose a steer-to-ERP approach to certify the ERP winning and synthesize the ERP winning strategy. To implement the strategy, we introduce a positional PEF and provide the necessary parameters, states, and strategies that ensure the ERP winning for both one pursuer and two pursuers against one evader. Additionally, we formulate a binary integer program using the subgame outcomes to maximize the captured evaders in the ERP winning for the pursuit task allocation. Finally, we propose a multiplayer receding-horizon strategy where the ERP winnings are checked in each horizon, the task is allocated, and the strategies of the pursuers are determined. Numerical examples are provided to illustrate the results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02389v2-abstract-full').style.display = 'none'; document.getElementById('2311.02389v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15548">arXiv:2310.15548</a> <span> [<a href="https://arxiv.org/pdf/2310.15548">pdf</a>, <a href="https://arxiv.org/ps/2310.15548">ps</a>, <a href="https://arxiv.org/format/2310.15548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Knowledge-driven Meta-learning for CSI Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+H">Han Xiao</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+W">Wenqiang Tian</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+W">Wendong Liu</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+J">Jiajia Guo</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhi Zhang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhihua Shi</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+L">Li Guo</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+J">Jia Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15548v2-abstract-short" style="display: inline;"> Accurate and effective channel state information (CSI) feedback is a key technology for massive multiple-input and multiple-output systems. Recently, deep learning (DL) has been introduced for CSI feedback enhancement through massive collected training data and lengthy training time, which is quite costly and impractical for realistic deployment. In this article, a knowledge-driven meta-learning a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15548v2-abstract-full').style.display = 'inline'; document.getElementById('2310.15548v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15548v2-abstract-full" style="display: none;"> Accurate and effective channel state information (CSI) feedback is a key technology for massive multiple-input and multiple-output systems. Recently, deep learning (DL) has been introduced for CSI feedback enhancement through massive collected training data and lengthy training time, which is quite costly and impractical for realistic deployment. In this article, a knowledge-driven meta-learning approach is proposed, where the DL model initialized by the meta model obtained from meta training phase is able to achieve rapid convergence when facing a new scenario during target retraining phase. Specifically, instead of training with massive data collected from various scenarios, the meta task environment is constructed based on the intrinsic knowledge of spatial-frequency characteristics of CSI for meta training. Moreover, the target task dataset is also augmented by exploiting the knowledge of statistical characteristics of wireless channel, so that the DL model can achieve higher performance with small actually collected dataset and short training time. In addition, we provide analyses of rationale for the improvement yielded by the knowledge in both phases. Simulation results demonstrate the superiority of the proposed approach from the perspective of feedback performance and convergence speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15548v2-abstract-full').style.display = 'none'; document.getElementById('2310.15548v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2301.13475</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.10964">arXiv:2310.10964</a> <span> [<a href="https://arxiv.org/pdf/2310.10964">pdf</a>, <a href="https://arxiv.org/format/2310.10964">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Spectral-Efficiency and Energy-Efficiency of Variable-Length XP-HARQ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+J">Jiahui Feng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+Y">Yaru Fu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.10964v1-abstract-short" style="display: inline;"> A variable-length cross-packet hybrid automatic repeat request (VL-XP-HARQ) is proposed to boost the spectral efficiency (SE) and the energy efficiency (EE) of communications. The SE is firstly derived in terms of the outage probabilities, with which the SE is proved to be upper bounded by the ergodic capacity (EC). Moreover, to facilitate the maximization of the SE, the asymptotic outage probabil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10964v1-abstract-full').style.display = 'inline'; document.getElementById('2310.10964v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.10964v1-abstract-full" style="display: none;"> A variable-length cross-packet hybrid automatic repeat request (VL-XP-HARQ) is proposed to boost the spectral efficiency (SE) and the energy efficiency (EE) of communications. The SE is firstly derived in terms of the outage probabilities, with which the SE is proved to be upper bounded by the ergodic capacity (EC). Moreover, to facilitate the maximization of the SE, the asymptotic outage probability is obtained at high signal-to-noise ratio (SNR), with which the SE is maximized by properly choosing the number of new information bits while guaranteeing outage requirement. By applying Dinkelbach's transform, the fractional objective function is transformed into a subtraction form, which can be decomposed into multiple sub-problems through alternating optimization. By noticing that the asymptotic outage probability is a convex function, each sub-problem can be easily relaxed to a convex problem by adopting successive convex approximation (SCA). Besides, the EE of VL-XP-HARQ is also investigated. An upper bound of the EE is found and proved to be attainable. Furthermore, by aiming at maximizing the EE via power allocation while confining outage within a certain constraint, the methods to the maximization of SE are invoked to solve the similar fractional problem. Finally, numerical results are presented for verification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10964v1-abstract-full').style.display = 'none'; document.getElementById('2310.10964v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06259">arXiv:2310.06259</a> <span> [<a href="https://arxiv.org/pdf/2310.06259">pdf</a>, <a href="https://arxiv.org/format/2310.06259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TMM.2024.3521746">10.1109/TMM.2024.3521746 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Cross-modal Cognitive Consensus guided Audio-Visual Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhaofeng Shi</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingbo Wu</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+F">Fanman Meng</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Linfeng Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hongliang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06259v5-abstract-short" style="display: inline;"> Audio-Visual Segmentation (AVS) aims to extract the sounding object from a video frame, which is represented by a pixel-wise segmentation mask for application scenarios such as multi-modal video editing, augmented reality, and intelligent robot systems. The pioneering work conducts this task through dense feature-level audio-visual interaction, which ignores the dimension gap between different mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06259v5-abstract-full').style.display = 'inline'; document.getElementById('2310.06259v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06259v5-abstract-full" style="display: none;"> Audio-Visual Segmentation (AVS) aims to extract the sounding object from a video frame, which is represented by a pixel-wise segmentation mask for application scenarios such as multi-modal video editing, augmented reality, and intelligent robot systems. The pioneering work conducts this task through dense feature-level audio-visual interaction, which ignores the dimension gap between different modalities. More specifically, the audio clip could only provide a Global semantic label in each sequence, but the video frame covers multiple semantic objects across different Local regions, which leads to mislocalization of the representationally similar but semantically different object. In this paper, we propose a Cross-modal Cognitive Consensus guided Network (C3N) to align the audio-visual semantics from the global dimension and progressively inject them into the local regions via an attention mechanism. Firstly, a Cross-modal Cognitive Consensus Inference Module (C3IM) is developed to extract a unified-modal label by integrating audio/visual classification confidence and similarities of modality-agnostic label embeddings. Then, we feed the unified-modal label back to the visual backbone as the explicit semantic-level guidance via a Cognitive Consensus guided Attention Module (CCAM), which highlights the local features corresponding to the interested object. Extensive experiments on the Single Sound Source Segmentation (S4) setting and Multiple Sound Source Segmentation (MS3) setting of the AVSBench dataset demonstrate the effectiveness of the proposed method, which achieves state-of-the-art performance. Code is available at https://github.com/ZhaofengSHI/AVS-C3N. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06259v5-abstract-full').style.display = 'none'; document.getElementById('2310.06259v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Multimedia; 16 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68U10 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.16372">arXiv:2309.16372</a> <span> [<a href="https://arxiv.org/pdf/2309.16372">pdf</a>, <a href="https://arxiv.org/format/2309.16372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Aperture Diffraction for Compact Snapshot Spectral Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lv%2C+T">Tao Lv</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+H">Hao Ye</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Q">Quan Yuan</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhan Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yibo Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuming Wang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+X">Xun Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.16372v1-abstract-short" style="display: inline;"> We demonstrate a compact, cost-effective snapshot spectral imaging system named Aperture Diffraction Imaging Spectrometer (ADIS), which consists only of an imaging lens with an ultra-thin orthogonal aperture mask and a mosaic filter sensor, requiring no additional physical footprint compared to common RGB cameras. Then we introduce a new optical design that each point in the object space is multip… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16372v1-abstract-full').style.display = 'inline'; document.getElementById('2309.16372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.16372v1-abstract-full" style="display: none;"> We demonstrate a compact, cost-effective snapshot spectral imaging system named Aperture Diffraction Imaging Spectrometer (ADIS), which consists only of an imaging lens with an ultra-thin orthogonal aperture mask and a mosaic filter sensor, requiring no additional physical footprint compared to common RGB cameras. Then we introduce a new optical design that each point in the object space is multiplexed to discrete encoding locations on the mosaic filter sensor by diffraction-based spatial-spectral projection engineering generated from the orthogonal mask. The orthogonal projection is uniformly accepted to obtain a weakly calibration-dependent data form to enhance modulation robustness. Meanwhile, the Cascade Shift-Shuffle Spectral Transformer (CSST) with strong perception of the diffraction degeneration is designed to solve a sparsity-constrained inverse problem, realizing the volume reconstruction from 2D measurements with Large amount of aliasing. Our system is evaluated by elaborating the imaging optical theory and reconstruction algorithm with demonstrating the experimental imaging under a single exposure. Ultimately, we achieve the sub-super-pixel spatial resolution and high spectral resolution imaging. The code will be available at: https://github.com/Krito-ex/CSST. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16372v1-abstract-full').style.display = 'none'; document.getElementById('2309.16372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by International Conference on Computer Vision (ICCV) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.07141">arXiv:2309.07141</a> <span> [<a href="https://arxiv.org/pdf/2309.07141">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Design of Recognition and Evaluation System for Table Tennis Players' Motor Skills Based on Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhuo-yong Shi</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+Y">Ye-tao Jia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+K">Ke-xin Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+D">Ding-han Wang</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+L">Long-meng Ji</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yong Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.07141v1-abstract-short" style="display: inline;"> With the rapid development of electronic science and technology, the research on wearable devices is constantly updated, but for now, it is not comprehensive for wearable devices to recognize and analyze the movement of specific sports. Based on this, this paper improves wearable devices of table tennis sport, and realizes the pattern recognition and evaluation of table tennis players' motor skill… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.07141v1-abstract-full').style.display = 'inline'; document.getElementById('2309.07141v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.07141v1-abstract-full" style="display: none;"> With the rapid development of electronic science and technology, the research on wearable devices is constantly updated, but for now, it is not comprehensive for wearable devices to recognize and analyze the movement of specific sports. Based on this, this paper improves wearable devices of table tennis sport, and realizes the pattern recognition and evaluation of table tennis players' motor skills through artificial intelligence. Firstly, a device is designed to collect the movement information of table tennis players and the actual movement data is processed. Secondly, a sliding window is made to divide the collected motion data into a characteristic database of six table tennis benchmark movements. Thirdly, motion features were constructed based on feature engineering, and motor skills were identified for different models after dimensionality reduction. Finally, the hierarchical evaluation system of motor skills is established with the loss functions of different evaluation indexes. The results show that in the recognition of table tennis players' motor skills, the feature-based BP neural network proposed in this paper has higher recognition accuracy and stronger generalization ability than the traditional convolutional neural network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.07141v1-abstract-full').style.display = 'none'; document.getElementById('2309.07141v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34pages, 16figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 93-01 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.1; H.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.04304">arXiv:2308.04304</a> <span> [<a href="https://arxiv.org/pdf/2308.04304">pdf</a>, <a href="https://arxiv.org/format/2308.04304">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The Model Inversion Eavesdropping Attack in Semantic Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuhao Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jiming Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.04304v1-abstract-short" style="display: inline;"> In recent years, semantic communication has been a popular research topic for its superiority in communication efficiency. As semantic communication relies on deep learning to extract meaning from raw messages, it is vulnerable to attacks targeting deep learning models. In this paper, we introduce the model inversion eavesdropping attack (MIEA) to reveal the risk of privacy leaks in the semantic c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04304v1-abstract-full').style.display = 'inline'; document.getElementById('2308.04304v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.04304v1-abstract-full" style="display: none;"> In recent years, semantic communication has been a popular research topic for its superiority in communication efficiency. As semantic communication relies on deep learning to extract meaning from raw messages, it is vulnerable to attacks targeting deep learning models. In this paper, we introduce the model inversion eavesdropping attack (MIEA) to reveal the risk of privacy leaks in the semantic communication system. In MIEA, the attacker first eavesdrops the signal being transmitted by the semantic communication system and then performs model inversion attack to reconstruct the raw message, where both the white-box and black-box settings are considered. Evaluation results show that MIEA can successfully reconstruct the raw message with good quality under different channel conditions. We then propose a defense method based on random permutation and substitution to defend against MIEA in order to achieve secure semantic communication. Our experimental results demonstrate the effectiveness of the proposed defense method in preventing MIEA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04304v1-abstract-full').style.display = 'none'; document.getElementById('2308.04304v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by 2023 IEEE Global Communications Conference (GLOBECOM)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.02140">arXiv:2308.02140</a> <span> [<a href="https://arxiv.org/pdf/2308.02140">pdf</a>, <a href="https://arxiv.org/ps/2308.02140">ps</a>, <a href="https://arxiv.org/format/2308.02140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Reinforcement Learning Empowered Rate Selection of XP-HARQ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+D">Da Wu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+J">Jiahui Feng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+H">Hongjiang Lei</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.02140v1-abstract-short" style="display: inline;"> The complex transmission mechanism of cross-packet hybrid automatic repeat request (XP-HARQ) hinders its optimal system design. To overcome this difficulty, this letter attempts to use the deep reinforcement learning (DRL) to solve the rate selection problem of XP-HARQ over correlated fading channels. In particular, the long term average throughput (LTAT) is maximized by properly choosing the incr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02140v1-abstract-full').style.display = 'inline'; document.getElementById('2308.02140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.02140v1-abstract-full" style="display: none;"> The complex transmission mechanism of cross-packet hybrid automatic repeat request (XP-HARQ) hinders its optimal system design. To overcome this difficulty, this letter attempts to use the deep reinforcement learning (DRL) to solve the rate selection problem of XP-HARQ over correlated fading channels. In particular, the long term average throughput (LTAT) is maximized by properly choosing the incremental information rate for each HARQ round on the basis of the outdated channel state information (CSI) available at the transmitter. The rate selection problem is first converted into a Markov decision process (MDP), which is then solved by capitalizing on the algorithm of deep deterministic policy gradient (DDPG) with prioritized experience replay. The simulation results finally corroborate the superiority of the proposed XP-HARQ scheme over the conventional HARQ with incremental redundancy (HARQ-IR) and the XP-HARQ with only statistical CSI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02140v1-abstract-full').style.display = 'none'; document.getElementById('2308.02140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.02131">arXiv:2308.02131</a> <span> [<a href="https://arxiv.org/pdf/2308.02131">pdf</a>, <a href="https://arxiv.org/format/2308.02131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Graph Convolutional Network Enabled Power-Constrained HARQ Strategy for URLLC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yi Chen</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+Y">Yaru Fu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+H">Haichuan Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.02131v1-abstract-short" style="display: inline;"> In this paper, a power-constrained hybrid automatic repeat request (HARQ) transmission strategy is developed to support ultra-reliable low-latency communications (URLLC). In particular, we aim to minimize the delivery latency of HARQ schemes over time-correlated fading channels, meanwhile ensuring the high reliability and limited power consumption. To ease the optimization, the simple asymptotic o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02131v1-abstract-full').style.display = 'inline'; document.getElementById('2308.02131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.02131v1-abstract-full" style="display: none;"> In this paper, a power-constrained hybrid automatic repeat request (HARQ) transmission strategy is developed to support ultra-reliable low-latency communications (URLLC). In particular, we aim to minimize the delivery latency of HARQ schemes over time-correlated fading channels, meanwhile ensuring the high reliability and limited power consumption. To ease the optimization, the simple asymptotic outage expressions of HARQ schemes are adopted. Furthermore, by noticing the non-convexity of the latency minimization problem and the intricate connection between different HARQ rounds, the graph convolutional network (GCN) is invoked for the optimal power solution owing to its powerful ability of handling the graph data. The primal-dual learning method is then leveraged to train the GCN weights. Consequently, the numerical results are presented for verification together with the comparisons among three HARQ schemes in terms of the latency and the reliability, where the three HARQ schemes include Type-I HARQ, HARQ with chase combining (HARQ-CC), and HARQ with incremental redundancy (HARQ-IR). To recapitulate, it is revealed that HARQ-IR offers the lowest latency while guaranteeing the demanded reliability target under a stringent power constraint, albeit at the price of high coding complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02131v1-abstract-full').style.display = 'none'; document.getElementById('2308.02131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.13220">arXiv:2307.13220</a> <span> [<a href="https://arxiv.org/pdf/2307.13220">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> One for Multiple: Physics-informed Synthetic Data Boosts Generalizable Deep Learning for Fast MRI Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zi Wang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xiaotong Yu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengyan Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weibo Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiazheng Wang</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+Y">Ying-Hua Chu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongwei Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+R">Rushuai Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+P">Peiyong Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/eess?searchtype=author&query=Han%2C+H">Haiwei Han</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+T">Taishan Kang</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+J">Jianzhong Lin</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chen Yang</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+S">Shufu Chang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhang Shi</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+S">Sha Hua</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Juan Hu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Liuhong Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jianjun Zhou</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+M">Meijing Lin</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+J">Jiefeng Guo</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+C">Congbo Cai</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhong Chen</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.13220v2-abstract-short" style="display: inline;"> Magnetic resonance imaging (MRI) is a widely used radiological modality renowned for its radiation-free, comprehensive insights into the human body, facilitating medical diagnoses. However, the drawback of prolonged scan times hinders its accessibility. The k-space undersampling offers a solution, yet the resultant artifacts necessitate meticulous removal during image reconstruction. Although Deep… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13220v2-abstract-full').style.display = 'inline'; document.getElementById('2307.13220v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.13220v2-abstract-full" style="display: none;"> Magnetic resonance imaging (MRI) is a widely used radiological modality renowned for its radiation-free, comprehensive insights into the human body, facilitating medical diagnoses. However, the drawback of prolonged scan times hinders its accessibility. The k-space undersampling offers a solution, yet the resultant artifacts necessitate meticulous removal during image reconstruction. Although Deep Learning (DL) has proven effective for fast MRI image reconstruction, its broader applicability across various imaging scenarios has been constrained. Challenges include the high cost and privacy restrictions associated with acquiring large-scale, diverse training data, coupled with the inherent difficulty of addressing mismatches between training and target data in existing DL methodologies. Here, we present a novel Physics-Informed Synthetic data learning framework for Fast MRI, called PISF. PISF marks a breakthrough by enabling generalized DL for multi-scenario MRI reconstruction through a single trained model. Our approach separates the reconstruction of a 2D image into many 1D basic problems, commencing with 1D data synthesis to facilitate generalization. We demonstrate that training DL models on synthetic data, coupled with enhanced learning techniques, yields in vivo MRI reconstructions comparable to or surpassing those of models trained on matched realistic datasets, reducing the reliance on real-world MRI data by up to 96%. Additionally, PISF exhibits remarkable generalizability across multiple vendors and imaging centers. Its adaptability to diverse patient populations has been validated through evaluations by ten experienced medical professionals. PISF presents a feasible and cost-effective way to significantly boost the widespread adoption of DL in various fast MRI applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13220v2-abstract-full').style.display = 'none'; document.getElementById('2307.13220v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38 pages, 19 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13296">arXiv:2306.13296</a> <span> [<a href="https://arxiv.org/pdf/2306.13296">pdf</a>, <a href="https://arxiv.org/format/2306.13296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Semantic-aware Transmission for Robust Point Cloud Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+T">Tianxiao Han</a>, <a href="/search/eess?searchtype=author&query=Chi%2C+K">Kaiyi Chi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13296v1-abstract-short" style="display: inline;"> As three-dimensional (3D) data acquisition devices become increasingly prevalent, the demand for 3D point cloud transmission is growing. In this study, we introduce a semantic-aware communication system for robust point cloud classification that capitalizes on the advantages of pre-trained Point-BERT models. Our proposed method comprises four main components: the semantic encoder, channel encoder,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13296v1-abstract-full').style.display = 'inline'; document.getElementById('2306.13296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13296v1-abstract-full" style="display: none;"> As three-dimensional (3D) data acquisition devices become increasingly prevalent, the demand for 3D point cloud transmission is growing. In this study, we introduce a semantic-aware communication system for robust point cloud classification that capitalizes on the advantages of pre-trained Point-BERT models. Our proposed method comprises four main components: the semantic encoder, channel encoder, channel decoder, and semantic decoder. By employing a two-stage training strategy, our system facilitates efficient and adaptable learning tailored to the specific classification tasks. The results show that the proposed system achieves classification accuracy of over 89\% when SNR is higher than 10 dB and still maintains accuracy above 66.6\% even at SNR of 4 dB. Compared to the existing method, our approach performs at 0.8\% to 48\% better across different SNR values, demonstrating robustness to channel noise. Our system also achieves a balance between accuracy and speed, being computationally efficient while maintaining high classification performance under noisy channel conditions. This adaptable and resilient approach holds considerable promise for a wide array of 3D scene understanding applications, effectively addressing the challenges posed by channel noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13296v1-abstract-full').style.display = 'none'; document.getElementById('2306.13296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to globecom 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.03546">arXiv:2305.03546</a> <span> [<a href="https://arxiv.org/pdf/2305.03546">pdf</a>, <a href="https://arxiv.org/format/2305.03546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Breast Cancer Immunohistochemical Image Generation: a Benchmark Dataset and Challenge Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+C">Chuang Zhu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shengjie Liu</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+Z">Zekuan Yu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+F">Feng Xu</a>, <a href="/search/eess?searchtype=author&query=Aggarwal%2C+A">Arpit Aggarwal</a>, <a href="/search/eess?searchtype=author&query=Corredor%2C+G">Germ谩n Corredor</a>, <a href="/search/eess?searchtype=author&query=Madabhushi%2C+A">Anant Madabhushi</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+Q">Qixun Qu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+H">Hongwei Fan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+F">Fangda Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yueheng Li</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+X">Xianchao Guan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yongbing Zhang</a>, <a href="/search/eess?searchtype=author&query=Singh%2C+V+K">Vivek Kumar Singh</a>, <a href="/search/eess?searchtype=author&query=Akram%2C+F">Farhan Akram</a>, <a href="/search/eess?searchtype=author&query=Sarker%2C+M+M+K">Md. Mostafa Kamal Sarker</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhongyue Shi</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+M">Mulan Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.03546v2-abstract-short" style="display: inline;"> For invasive breast cancer, immunohistochemical (IHC) techniques are often used to detect the expression level of human epidermal growth factor receptor-2 (HER2) in breast tissue to formulate a precise treatment plan. From the perspective of saving manpower, material and time costs, directly generating IHC-stained images from Hematoxylin and Eosin (H&E) stained images is a valuable research direct… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.03546v2-abstract-full').style.display = 'inline'; document.getElementById('2305.03546v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.03546v2-abstract-full" style="display: none;"> For invasive breast cancer, immunohistochemical (IHC) techniques are often used to detect the expression level of human epidermal growth factor receptor-2 (HER2) in breast tissue to formulate a precise treatment plan. From the perspective of saving manpower, material and time costs, directly generating IHC-stained images from Hematoxylin and Eosin (H&E) stained images is a valuable research direction. Therefore, we held the breast cancer immunohistochemical image generation challenge, aiming to explore novel ideas of deep learning technology in pathological image generation and promote research in this field. The challenge provided registered H&E and IHC-stained image pairs, and participants were required to use these images to train a model that can directly generate IHC-stained images from corresponding H&E-stained images. We selected and reviewed the five highest-ranking methods based on their PSNR and SSIM metrics, while also providing overviews of the corresponding pipelines and implementations. In this paper, we further analyze the current limitations in the field of breast cancer immunohistochemical image generation and forecast the future development of this field. We hope that the released dataset and the challenge will inspire more scholars to jointly study higher-quality IHC-stained image generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.03546v2-abstract-full').style.display = 'none'; document.getElementById('2305.03546v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 12 figures, 2tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.01871">arXiv:2305.01871</a> <span> [<a href="https://arxiv.org/pdf/2305.01871">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Convolutional neural network-based single-shot speckle tracking for x-ray phase-contrast imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+S+Q+Z">Serena Qinyun Z. Shi</a>, <a href="/search/eess?searchtype=author&query=Shapira%2C+N">Nadav Shapira</a>, <a href="/search/eess?searchtype=author&query=No%C3%ABl%2C+P+B">Peter B. No毛l</a>, <a href="/search/eess?searchtype=author&query=Meyer%2C+S">Sebastian Meyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.01871v1-abstract-short" style="display: inline;"> X-ray phase-contrast imaging offers enhanced sensitivity for weakly-attenuating materials, such as breast and brain tissue, but has yet to be widely implemented clinically due to high coherence requirements and expensive x-ray optics. Speckle-based phase contrast imaging has been proposed as an affordable and simple alternative; however, obtaining high-quality phase-contrast images requires accura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.01871v1-abstract-full').style.display = 'inline'; document.getElementById('2305.01871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.01871v1-abstract-full" style="display: none;"> X-ray phase-contrast imaging offers enhanced sensitivity for weakly-attenuating materials, such as breast and brain tissue, but has yet to be widely implemented clinically due to high coherence requirements and expensive x-ray optics. Speckle-based phase contrast imaging has been proposed as an affordable and simple alternative; however, obtaining high-quality phase-contrast images requires accurate tracking of sample-induced speckle pattern modulations. This study introduced a convolutional neural network to accurately retrieve sub-pixel displacement fields from pairs of reference (i.e., without sample) and sample images for speckle tracking. Speckle patterns were generated utilizing an in-house wave-optical simulation tool. These images were then randomly deformed and attenuated to generate training and testing datasets. The performance of the model was evaluated and compared against conventional speckle tracking algorithms: zero-normalized cross-correlation and unified modulated pattern analysis. We demonstrate improved accuracy (1.7 times better than conventional speckle tracking), bias (2.6 times), and spatial resolution (2.3 times), as well as noise robustness, window size independence, and computational efficiency. In addition, the model was validated with a simulated geometric phantom. Thus, in this study, we propose a novel convolutional-neural-network-based speckle-tracking method with enhanced performance and robustness that offers improved alternative tracking while further expanding the potential applications of speckle-based phase contrast imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.01871v1-abstract-full').style.display = 'none'; document.getElementById('2305.01871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.12184">arXiv:2304.12184</a> <span> [<a href="https://arxiv.org/pdf/2304.12184">pdf</a>, <a href="https://arxiv.org/format/2304.12184">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Active RIS-aided EH-NOMA Networks: A Deep Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhaoyuan Shi</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+H">Huabing Lu</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xianzhong Xie</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Helin Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+J">Jun Cai</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z">Zhiguo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.12184v1-abstract-short" style="display: inline;"> An active reconfigurable intelligent surface (RIS)-aided multi-user downlink communication system is investigated, where non-orthogonal multiple access (NOMA) is employed to improve spectral efficiency, and the active RIS is powered by energy harvesting (EH). The problem of joint control of the RIS's amplification matrix and phase shift matrix is formulated to maximize the communication success ra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.12184v1-abstract-full').style.display = 'inline'; document.getElementById('2304.12184v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.12184v1-abstract-full" style="display: none;"> An active reconfigurable intelligent surface (RIS)-aided multi-user downlink communication system is investigated, where non-orthogonal multiple access (NOMA) is employed to improve spectral efficiency, and the active RIS is powered by energy harvesting (EH). The problem of joint control of the RIS's amplification matrix and phase shift matrix is formulated to maximize the communication success ratio with considering the quality of service (QoS) requirements of users, dynamic communication state, and dynamic available energy of RIS. To tackle this non-convex problem, a cascaded deep learning algorithm namely long short-term memory-deep deterministic policy gradient (LSTM-DDPG) is designed. First, an advanced LSTM based algorithm is developed to predict users' dynamic communication state. Then, based on the prediction results, a DDPG based algorithm is proposed to joint control the amplification matrix and phase shift matrix of the RIS. Finally, simulation results verify the accuracy of the prediction of the proposed LSTM algorithm, and demonstrate that the LSTM-DDPG algorithm has a significant advantage over other benchmark algorithms in terms of communication success ratio performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.12184v1-abstract-full').style.display = 'none'; document.getElementById('2304.12184v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.11341">arXiv:2304.11341</a> <span> [<a href="https://arxiv.org/pdf/2304.11341">pdf</a>, <a href="https://arxiv.org/ps/2304.11341">ps</a>, <a href="https://arxiv.org/format/2304.11341">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Performance Analysis and Optimal Design of HARQ-IR-Aided Terahertz Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Song%2C+Z">Ziyang Song</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Su%2C+J">Jiaji Su</a>, <a href="/search/eess?searchtype=author&query=Dou%2C+Q">Qingping Dou</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+H">Haichuan Ding</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.11341v1-abstract-short" style="display: inline;"> Terahertz (THz) communications are envisioned to be a promising technology for 6G thanks to its broad bandwidth. However, the large path loss, antenna misalignment, and atmospheric influence of THz communications severely deteriorate its reliability. To address this, hybrid automatic repeat request (HARQ) is recognized as an effective technique to ensure reliable THz communications. This paper del… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11341v1-abstract-full').style.display = 'inline'; document.getElementById('2304.11341v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.11341v1-abstract-full" style="display: none;"> Terahertz (THz) communications are envisioned to be a promising technology for 6G thanks to its broad bandwidth. However, the large path loss, antenna misalignment, and atmospheric influence of THz communications severely deteriorate its reliability. To address this, hybrid automatic repeat request (HARQ) is recognized as an effective technique to ensure reliable THz communications. This paper delves into the performance analysis of HARQ with incremental redundancy (HARQ-IR)-aided THz communications in the presence/absence of blockage. More specifically, the analytical expression of the outage probability of HARQ-IR-aided THz communications is derived, with which the asymptotic outage analysis is enabled to gain meaningful insights, including diversity order, power allocation gain, modulation and coding gain, etc. Then the long term average throughput (LTAT) is expressed in terms of the outage probability based on renewal theory. Moreover, to combat the blockage effects, a multi-hop HARQ-IR-aided THz communication scheme is proposed and its performance is examined. To demonstrate the superiority of the proposed scheme, the other two HARQ-aided schemes, i.e., Type-I HARQ and HARQ with chase combining (HARQ-CC), are used for benchmarking in the simulations. In addition, a deep neural network (DNN) based outage evaluation framework with low computational complexity is devised to reap the benefits of using both asymptotic and simulation results in low and high outage regimes, respectively. This novel outage evaluation framework is finally employed for the optimal rate selection, which outperforms the asymptotic based optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11341v1-abstract-full').style.display = 'none'; document.getElementById('2304.11341v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Blockage, hybrid automatic repeat request (HARQ), outage probability, terahertz (THz) communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14095">arXiv:2303.14095</a> <span> [<a href="https://arxiv.org/pdf/2303.14095">pdf</a>, <a href="https://arxiv.org/format/2303.14095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> PanoVPR: Towards Unified Perspective-to-Equirectangular Visual Place Recognition via Sliding Windows across the Panoramic View </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Ze Shi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+H">Hao Shi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+K">Kailun Yang</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+Z">Zhe Yin</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yining Lin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kaiwei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14095v2-abstract-short" style="display: inline;"> Visual place recognition has gained significant attention in recent years as a crucial technology in autonomous driving and robotics. Currently, the two main approaches are the perspective view retrieval (P2P) paradigm and the equirectangular image retrieval (E2E) paradigm. However, it is practical and natural to assume that users only have consumer-grade pinhole cameras to obtain query perspectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14095v2-abstract-full').style.display = 'inline'; document.getElementById('2303.14095v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14095v2-abstract-full" style="display: none;"> Visual place recognition has gained significant attention in recent years as a crucial technology in autonomous driving and robotics. Currently, the two main approaches are the perspective view retrieval (P2P) paradigm and the equirectangular image retrieval (E2E) paradigm. However, it is practical and natural to assume that users only have consumer-grade pinhole cameras to obtain query perspective images and retrieve them in panoramic database images from map providers. To address this, we propose \textit{PanoVPR}, a perspective-to-equirectangular (P2E) visual place recognition framework that employs sliding windows to eliminate feature truncation caused by hard cropping. Specifically, PanoVPR slides windows over the entire equirectangular image and computes feature descriptors for each window, which are then compared to determine place similarity. Notably, our unified framework enables direct transfer of the backbone from P2P methods without any modification, supporting not only CNNs but also Transformers. To facilitate training and evaluation, we derive the Pitts250k-P2E dataset from the Pitts250k and establish YQ360, latter is the first P2E visual place recognition dataset collected by a mobile robot platform aiming to simulate real-world task scenarios better. Extensive experiments demonstrate that PanoVPR achieves state-of-the-art performance and obtains 3.8% and 8.0% performance gain on Pitts250k-P2E and YQ360 compared to the previous best method, respectively. Code and datasets will be publicly available at https://github.com/zafirshi/PanoVPR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14095v2-abstract-full').style.display = 'none'; document.getElementById('2303.14095v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ITSC 2023. Code and datasets will be made available at https://github.com/zafirshi/PanoVPR</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.12662">arXiv:2302.12662</a> <span> [<a href="https://arxiv.org/pdf/2302.12662">pdf</a>, <a href="https://arxiv.org/format/2302.12662">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FedDBL: Communication and Data Efficient Federated Deep-Broad Learning for Histopathological Tissue Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Deng%2C+T">Tianpeng Deng</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yanqi Huang</a>, <a href="/search/eess?searchtype=author&query=Han%2C+G">Guoqiang Han</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+J">Jiatai Lin</a>, <a href="/search/eess?searchtype=author&query=Dou%2C+Q">Qi Dou</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zaiyi Liu</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+X">Xiao-jing Guo</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C+L+P">C. L. Philip Chen</a>, <a href="/search/eess?searchtype=author&query=Han%2C+C">Chu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.12662v2-abstract-short" style="display: inline;"> Histopathological tissue classification is a fundamental task in computational pathology. Deep learning-based models have achieved superior performance but centralized training with data centralization suffers from the privacy leakage problem. Federated learning (FL) can safeguard privacy by keeping training samples locally, but existing FL-based frameworks require a large number of well-annotated… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12662v2-abstract-full').style.display = 'inline'; document.getElementById('2302.12662v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.12662v2-abstract-full" style="display: none;"> Histopathological tissue classification is a fundamental task in computational pathology. Deep learning-based models have achieved superior performance but centralized training with data centralization suffers from the privacy leakage problem. Federated learning (FL) can safeguard privacy by keeping training samples locally, but existing FL-based frameworks require a large number of well-annotated training samples and numerous rounds of communication which hinder their practicability in the real-world clinical scenario. In this paper, we propose a universal and lightweight federated learning framework, named Federated Deep-Broad Learning (FedDBL), to achieve superior classification performance with limited training samples and only one-round communication. By simply associating a pre-trained deep learning feature extractor, a fast and lightweight broad learning inference system and a classical federated aggregation approach, FedDBL can dramatically reduce data dependency and improve communication efficiency. Five-fold cross-validation demonstrates that FedDBL greatly outperforms the competitors with only one-round communication and limited training samples, while it even achieves comparable performance with the ones under multiple-round communications. Furthermore, due to the lightweight design and one-round communication, FedDBL reduces the communication burden from 4.6GB to only 276.5KB per client using the ResNet-50 backbone at 50-round training. Since no data or deep model sharing across different clients, the privacy issue is well-solved and the model security is guaranteed with no model inversion attack risk. Code is available at https://github.com/tianpeng-deng/FedDBL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12662v2-abstract-full').style.display = 'none'; document.getElementById('2302.12662v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.12004">arXiv:2302.12004</a> <span> [<a href="https://arxiv.org/pdf/2302.12004">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Distillation-based Information Sharing for Online Process Monitoring in Decentralized Manufacturing System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhangyue Shi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuxuan Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chenang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.12004v2-abstract-short" style="display: inline;"> In advanced manufacturing, the incorporation of sensing technology provides an opportunity to achieve efficient in-situ process monitoring using machine learning methods. Meanwhile, the advances of information technologies also enable a connected and decentralized environment for manufacturing systems, making different manufacturing units in the system collaborate more closely. In a decentralized… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12004v2-abstract-full').style.display = 'inline'; document.getElementById('2302.12004v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.12004v2-abstract-full" style="display: none;"> In advanced manufacturing, the incorporation of sensing technology provides an opportunity to achieve efficient in-situ process monitoring using machine learning methods. Meanwhile, the advances of information technologies also enable a connected and decentralized environment for manufacturing systems, making different manufacturing units in the system collaborate more closely. In a decentralized manufacturing system, the involved units may fabricate same or similar products and deploy their own machine learning model for online process monitoring. However, due to the possible inconsistency of task progress during the operation, it is also common that some units have more informative data while some have less informative data. Thus, the monitoring performance of machine learning model for each unit may highly vary. Therefore, it is extremely valuable to achieve efficient and secured knowledge sharing among the units in a decentralized manufacturing system for enhancement of poorly performed models. To realize this goal, this paper proposes a novel knowledge distillation-based information sharing (KD-IS) framework, which could distill informative knowledge from well performed models to improve the monitoring performance of poorly performed models. To validate the effectiveness of this method, a real-world case study is conducted in a connected fused filament fabrication (FFF)-based additive manufacturing (AM) platform. The experimental results show that the developed method is very efficient in improving model monitoring performance at poorly performed models, with solid protection on potential data privacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12004v2-abstract-full').style.display = 'none'; document.getElementById('2302.12004v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.02608">arXiv:2302.02608</a> <span> [<a href="https://arxiv.org/pdf/2302.02608">pdf</a>, <a href="https://arxiv.org/ps/2302.02608">ps</a>, <a href="https://arxiv.org/format/2302.02608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Cooperative Task-Oriented Communication for Multi-Modal Data with Transmission Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiqi Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhaoyang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.02608v1-abstract-short" style="display: inline;"> Real-time intelligence applications in Internet of Things (IoT) environment depend on timely data communication. However, it is challenging to transmit and analyse massive data of various modalities. Recently proposed task-oriented communication methods based on deep learning have showed its superiority in communication efficiency. In this paper, we propose a cooperative task-oriented communicatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02608v1-abstract-full').style.display = 'inline'; document.getElementById('2302.02608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.02608v1-abstract-full" style="display: none;"> Real-time intelligence applications in Internet of Things (IoT) environment depend on timely data communication. However, it is challenging to transmit and analyse massive data of various modalities. Recently proposed task-oriented communication methods based on deep learning have showed its superiority in communication efficiency. In this paper, we propose a cooperative task-oriented communication method for the transmission of multi-modal data from multiple end devices to a central server. In particular, we use the transmission result of data of one modality, which is with lower rate, to control the transmission of other modalities with higher rate in order to reduce the amount of transmitted date. We take the human activity recognition (HAR) task in a smart home environment and design the semantic-oriented transceivers for the transmission of monitoring videos of different rooms and acceleration data of the monitored human. The numerical results demonstrate that by using the transmission control based on the obtained results of the received acceleration data, the transmission is reduced to 2% of that without transmission control while preserving the performance on the HAR task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02608v1-abstract-full').style.display = 'none'; document.getElementById('2302.02608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.13475">arXiv:2301.13475</a> <span> [<a href="https://arxiv.org/pdf/2301.13475">pdf</a>, <a href="https://arxiv.org/ps/2301.13475">ps</a>, <a href="https://arxiv.org/format/2301.13475">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Knowledge-Driven Meta-Learning Method for CSI Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+H">Han Xiao</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+W">Wenqiang Tian</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+W">Wendong Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhi Zhang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhihua Shi</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+L">Li Guo</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+J">Jia Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.13475v1-abstract-short" style="display: inline;"> Accurate and effective channel state information (CSI) feedback is a key technology for massive multiple-input and multiple-output (MIMO) systems. Recently, deep learning (DL) has been introduced to enhance CSI feedback in massive MIMO application, where the massive collected training data and lengthy training time are costly and impractical for realistic deployment. In this paper, a knowledge-dri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13475v1-abstract-full').style.display = 'inline'; document.getElementById('2301.13475v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.13475v1-abstract-full" style="display: none;"> Accurate and effective channel state information (CSI) feedback is a key technology for massive multiple-input and multiple-output (MIMO) systems. Recently, deep learning (DL) has been introduced to enhance CSI feedback in massive MIMO application, where the massive collected training data and lengthy training time are costly and impractical for realistic deployment. In this paper, a knowledge-driven meta-learning solution for CSI feedback is proposed, where the DL model initialized by the meta model obtained from meta training phase is able to achieve rapid convergence when facing a new scenario during the target retraining phase. Specifically, instead of training with massive data collected from various scenarios, the meta task environment is constructed based on the intrinsic knowledge of spatial-frequency characteristics of CSI for meta training. Moreover, the target task dataset is also augmented by exploiting the knowledge of statistical characteristics of channel, so that the DL model initialized by meta training can rapidly fit into a new target scenario with higher performance using only a few actually collected data in the target retraining phase. The method greatly reduces the demand for the number of actual collected data, as well as the cost of training time for realistic deployment. Simulation results demonstrate the superiority of the proposed approach from the perspective of feedback performance and convergence speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13475v1-abstract-full').style.display = 'none'; document.getElementById('2301.13475v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.10287">arXiv:2211.10287</a> <span> [<a href="https://arxiv.org/pdf/2211.10287">pdf</a>, <a href="https://arxiv.org/format/2211.10287">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Generative Model Based Highly Efficient Semantic Communication Approach for Image Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+T">Tianxiao Han</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+J">Jiancheng Tang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+Y">Yiping Duan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhaoyang Zhang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhiguo Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.10287v1-abstract-short" style="display: inline;"> Deep learning (DL) based semantic communication methods have been explored to transmit images efficiently in recent years. In this paper, we propose a generative model based semantic communication to further improve the efficiency of image transmission and protect private information. In particular, the transmitter extracts the interpretable latent representation from the original image by a gener… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.10287v1-abstract-full').style.display = 'inline'; document.getElementById('2211.10287v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.10287v1-abstract-full" style="display: none;"> Deep learning (DL) based semantic communication methods have been explored to transmit images efficiently in recent years. In this paper, we propose a generative model based semantic communication to further improve the efficiency of image transmission and protect private information. In particular, the transmitter extracts the interpretable latent representation from the original image by a generative model exploiting the GAN inversion method. We also employ a privacy filter and a knowledge base to erase private information and replace it with natural features in the knowledge base. The simulation results indicate that our proposed method achieves comparable quality of received images while significantly reducing communication costs compared to the existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.10287v1-abstract-full').style.display = 'none'; document.getElementById('2211.10287v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00648">arXiv:2211.00648</a> <span> [<a href="https://arxiv.org/pdf/2211.00648">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41467-023-38898-4">10.1038/s41467-023-38898-4 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Non-line-of-sight imaging with arbitrary illumination and detection pattern </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xintong Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jianyu Wang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+L">Leping Xiao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zuoqiang Shi</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+X">Xing Fu</a>, <a href="/search/eess?searchtype=author&query=Qiu%2C+L">Lingyun Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00648v1-abstract-short" style="display: inline;"> Non-line-of-sight (NLOS) imaging aims at reconstructing targets obscured from the direct line of sight. Existing NLOS imaging algorithms require dense measurements at rectangular grid points in a large area of the relay surface, which severely hinders their availability to variable relay scenarios in practical applications such as robotic vision, autonomous driving, rescue operations and remote se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00648v1-abstract-full').style.display = 'inline'; document.getElementById('2211.00648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00648v1-abstract-full" style="display: none;"> Non-line-of-sight (NLOS) imaging aims at reconstructing targets obscured from the direct line of sight. Existing NLOS imaging algorithms require dense measurements at rectangular grid points in a large area of the relay surface, which severely hinders their availability to variable relay scenarios in practical applications such as robotic vision, autonomous driving, rescue operations and remote sensing. In this work, we propose a Bayesian framework for NLOS imaging with no specific requirements on the spatial pattern of illumination and detection points. By introducing virtual confocal signals, we design a confocal complemented signal-object collaborative regularization (CC-SOCR) algorithm for high quality reconstructions. Our approach is capable of reconstructing both albedo and surface normal of the hidden objects with fine details under the most general relay setting. Moreover, with a regular relay surface, coarse rather than dense measurements are enough for our approach such that the acquisition time can be reduced significantly. As demonstrated in multiple experiments, the new framework substantially enhances the applicability of NLOS imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00648v1-abstract-full').style.display = 'none'; document.getElementById('2211.00648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">main article: 32 pages with 8 figures; supplementary information: 49 pages with 26 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository