Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 155 results for author: <span class="mathjax">Xu, D</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Xu%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xu, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xu%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xu, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xu%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18375">arXiv:2411.18375</a> <span> [<a href="https://arxiv.org/pdf/2411.18375">pdf</a>, <a href="https://arxiv.org/format/2411.18375">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Individual Content and Motion Dynamics Preserved Pruning for Video Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yiming Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Huan Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhenghao Chen</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18375v1-abstract-short" style="display: inline;"> The high computational cost and slow inference time are major obstacles to deploying the video diffusion model (VDM) in practical applications. To overcome this, we introduce a new Video Diffusion Model Compression approach using individual content and motion dynamics preserved pruning and consistency loss. First, we empirically observe that deeper VDM layers are crucial for maintaining the qualit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18375v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18375v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18375v1-abstract-full" style="display: none;"> The high computational cost and slow inference time are major obstacles to deploying the video diffusion model (VDM) in practical applications. To overcome this, we introduce a new Video Diffusion Model Compression approach using individual content and motion dynamics preserved pruning and consistency loss. First, we empirically observe that deeper VDM layers are crucial for maintaining the quality of \textbf{motion dynamics} e.g., coherence of the entire video, while shallower layers are more focused on \textbf{individual content} e.g., individual frames. Therefore, we prune redundant blocks from the shallower layers while preserving more of the deeper layers, resulting in a lightweight VDM variant called VDMini. Additionally, we propose an \textbf{Individual Content and Motion Dynamics (ICMD)} Consistency Loss to gain comparable generation performance as larger VDM, i.e., the teacher to VDMini i.e., the student. Particularly, we first use the Individual Content Distillation (ICD) Loss to ensure consistency in the features of each generated frame between the teacher and student models. Next, we introduce a Multi-frame Content Adversarial (MCA) Loss to enhance the motion dynamics across the generated video as a whole. This method significantly accelerates inference time while maintaining high-quality video generation. Extensive experiments demonstrate the effectiveness of our VDMini on two important video generation tasks, Text-to-Video (T2V) and Image-to-Video (I2V), where we respectively achieve an average 2.5 $\times$ and 1.4 $\times$ speed up for the I2V method SF-V and the T2V method T2V-Turbo-v2, while maintaining the quality of the generated videos on two benchmarks, i.e., UCF101 and VBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18375v1-abstract-full').style.display = 'none'; document.getElementById('2411.18375v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 figures, 9 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08158">arXiv:2411.08158</a> <span> [<a href="https://arxiv.org/pdf/2411.08158">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TomoGRAF: A Robust and Generalizable Reconstruction Network for Single-View Computed Tomography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hengjie Liu</a>, <a href="/search/eess?searchtype=author&query=Lyu%2C+Q">Qihui Lyu</a>, <a href="/search/eess?searchtype=author&query=Descovich%2C+M">Martina Descovich</a>, <a href="/search/eess?searchtype=author&query=Ruan%2C+D">Dan Ruan</a>, <a href="/search/eess?searchtype=author&query=Sheng%2C+K">Ke Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08158v1-abstract-short" style="display: inline;"> Computed tomography (CT) provides high spatial resolution visualization of 3D structures for scientific and clinical applications. Traditional analytical/iterative CT reconstruction algorithms require hundreds of angular data samplings, a condition that may not be met in practice due to physical and mechanical limitations. Sparse view CT reconstruction has been proposed using constrained optimizat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08158v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08158v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08158v1-abstract-full" style="display: none;"> Computed tomography (CT) provides high spatial resolution visualization of 3D structures for scientific and clinical applications. Traditional analytical/iterative CT reconstruction algorithms require hundreds of angular data samplings, a condition that may not be met in practice due to physical and mechanical limitations. Sparse view CT reconstruction has been proposed using constrained optimization and machine learning methods with varying success, less so for ultra-sparse view CT reconstruction with one to two views. Neural radiance field (NeRF) is a powerful tool for reconstructing and rendering 3D natural scenes from sparse views, but its direct application to 3D medical image reconstruction has been minimally successful due to the differences between optical and X-ray photon transportation. Here, we develop a novel TomoGRAF framework incorporating the unique X-ray transportation physics to reconstruct high-quality 3D volumes using ultra-sparse projections without prior. TomoGRAF captures the CT imaging geometry, simulates the X-ray casting and tracing process, and penalizes the difference between simulated and ground truth CT sub-volume during training. We evaluated the performance of TomoGRAF on an unseen dataset of distinct imaging characteristics from the training data and demonstrated a vast leap in performance compared with state-of-the-art deep learning and NeRF methods. TomoGRAF provides the first generalizable solution for image-guided radiotherapy and interventional radiology applications, where only one or a few X-ray views are available, but 3D volumetric information is desired. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08158v1-abstract-full').style.display = 'none'; document.getElementById('2411.08158v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01425">arXiv:2411.01425</a> <span> [<a href="https://arxiv.org/pdf/2411.01425">pdf</a>, <a href="https://arxiv.org/format/2411.01425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Learning Hidden Subgoals under Temporal Ordering Constraints in Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Duo Xu</a>, <a href="/search/eess?searchtype=author&query=Fekri%2C+F">Faramarz Fekri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01425v1-abstract-short" style="display: inline;"> In real-world applications, the success of completing a task is often determined by multiple key steps which are distant in time steps and have to be achieved in a fixed time order. For example, the key steps listed on the cooking recipe should be achieved one-by-one in the right time order. These key steps can be regarded as subgoals of the task and their time orderings are described as temporal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01425v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01425v1-abstract-full" style="display: none;"> In real-world applications, the success of completing a task is often determined by multiple key steps which are distant in time steps and have to be achieved in a fixed time order. For example, the key steps listed on the cooking recipe should be achieved one-by-one in the right time order. These key steps can be regarded as subgoals of the task and their time orderings are described as temporal ordering constraints. However, in many real-world problems, subgoals or key states are often hidden in the state space and their temporal ordering constraints are also unknown, which make it challenging for previous RL algorithms to solve this kind of tasks. In order to address this issue, in this work we propose a novel RL algorithm for {\bf l}earning hidden {\bf s}ubgoals under {\bf t}emporal {\bf o}rdering {\bf c}onstraints (LSTOC). We propose a new contrastive learning objective which can effectively learn hidden subgoals (key states) and their temporal orderings at the same time, based on first-occupancy representation and temporal geometric sampling. In addition, we propose a sample-efficient learning strategy to discover subgoals one-by-one following their temporal order constraints by building a subgoal tree to represent discovered subgoals and their temporal ordering relationships. Specifically, this tree can be used to improve the sample efficiency of trajectory collection, fasten the task solving and generalize to unseen tasks. The LSTOC framework is evaluated on several environments with image-based observations, showing its significant improvement over baseline methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01425v1-abstract-full').style.display = 'none'; document.getElementById('2411.01425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09686">arXiv:2410.09686</a> <span> [<a href="https://arxiv.org/pdf/2410.09686">pdf</a>, <a href="https://arxiv.org/format/2410.09686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Generalization of Compositional Tasks with Logical Specification via Implicit Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Duo Xu</a>, <a href="/search/eess?searchtype=author&query=Fekri%2C+F">Faramarz Fekri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09686v2-abstract-short" style="display: inline;"> In this study, we address the challenge of learning generalizable policies for compositional tasks defined by logical specifications. These tasks consist of multiple temporally extended sub-tasks. Due to the sub-task inter-dependencies and sparse reward issue in long-horizon tasks, existing reinforcement learning (RL) approaches, such as task-conditioned and goal-conditioned policies, continue to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09686v2-abstract-full').style.display = 'inline'; document.getElementById('2410.09686v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09686v2-abstract-full" style="display: none;"> In this study, we address the challenge of learning generalizable policies for compositional tasks defined by logical specifications. These tasks consist of multiple temporally extended sub-tasks. Due to the sub-task inter-dependencies and sparse reward issue in long-horizon tasks, existing reinforcement learning (RL) approaches, such as task-conditioned and goal-conditioned policies, continue to struggle with slow convergence and sub-optimal performance in generalizing to compositional tasks. To overcome these limitations, we introduce a new hierarchical RL framework that enhances the efficiency and optimality of task generalization. At the high level, we present an implicit planner specifically designed for generalizing compositional tasks. This planner selects the next sub-task and estimates the multi-step return for completing the remaining task to complete from the current state. It learns a latent transition model and performs planning in the latent space by using a graph neural network (GNN). Subsequently, the high-level planner's selected sub-task guides the low-level agent to effectively handle long-horizon tasks, while the multi-step return encourages the low-level policy to account for future sub-task dependencies, enhancing its optimality. We conduct comprehensive experiments to demonstrate the framework's advantages over previous methods in terms of both efficiency and optimality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09686v2-abstract-full').style.display = 'none'; document.getElementById('2410.09686v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11169">arXiv:2409.11169</a> <span> [<a href="https://arxiv.org/pdf/2409.11169">pdf</a>, <a href="https://arxiv.org/format/2409.11169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MAISI: Medical AI for Synthetic Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+P">Pengfei Guo</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Can Zhao</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+D">Dong Yang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Ziyue Xu</a>, <a href="/search/eess?searchtype=author&query=Nath%2C+V">Vishwesh Nath</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/eess?searchtype=author&query=Simon%2C+B">Benjamin Simon</a>, <a href="/search/eess?searchtype=author&query=Belue%2C+M">Mason Belue</a>, <a href="/search/eess?searchtype=author&query=Harmon%2C+S">Stephanie Harmon</a>, <a href="/search/eess?searchtype=author&query=Turkbey%2C+B">Baris Turkbey</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Daguang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11169v2-abstract-short" style="display: inline;"> Medical imaging analysis faces challenges such as data scarcity, high annotation costs, and privacy concerns. This paper introduces the Medical AI for Synthetic Imaging (MAISI), an innovative approach using the diffusion model to generate synthetic 3D computed tomography (CT) images to address those challenges. MAISI leverages the foundation volume compression network and the latent diffusion mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11169v2-abstract-full').style.display = 'inline'; document.getElementById('2409.11169v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11169v2-abstract-full" style="display: none;"> Medical imaging analysis faces challenges such as data scarcity, high annotation costs, and privacy concerns. This paper introduces the Medical AI for Synthetic Imaging (MAISI), an innovative approach using the diffusion model to generate synthetic 3D computed tomography (CT) images to address those challenges. MAISI leverages the foundation volume compression network and the latent diffusion model to produce high-resolution CT images (up to a landmark volume dimension of 512 x 512 x 768 ) with flexible volume dimensions and voxel spacing. By incorporating ControlNet, MAISI can process organ segmentation, including 127 anatomical structures, as additional conditions and enables the generation of accurately annotated synthetic images that can be used for various downstream tasks. Our experiment results show that MAISI's capabilities in generating realistic, anatomically accurate images for diverse regions and conditions reveal its promising potential to mitigate challenges using synthetic data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11169v2-abstract-full').style.display = 'none'; document.getElementById('2409.11169v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WACV25 accepted. https://monai.io/research/maisi</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10281">arXiv:2409.10281</a> <span> [<a href="https://arxiv.org/pdf/2409.10281">pdf</a>, <a href="https://arxiv.org/format/2409.10281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DreamHead: Learning Spatial-Temporal Correspondence via Hierarchical Diffusion for Audio-driven Talking Head Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hong%2C+F">Fa-Ting Hong</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yunfei Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yu Li</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+C">Changyin Zhou</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F">Fei Yu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10281v1-abstract-short" style="display: inline;"> Audio-driven talking head synthesis strives to generate lifelike video portraits from provided audio. The diffusion model, recognized for its superior quality and robust generalization, has been explored for this task. However, establishing a robust correspondence between temporal audio cues and corresponding spatial facial expressions with diffusion models remains a significant challenge in talki… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10281v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10281v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10281v1-abstract-full" style="display: none;"> Audio-driven talking head synthesis strives to generate lifelike video portraits from provided audio. The diffusion model, recognized for its superior quality and robust generalization, has been explored for this task. However, establishing a robust correspondence between temporal audio cues and corresponding spatial facial expressions with diffusion models remains a significant challenge in talking head generation. To bridge this gap, we present DreamHead, a hierarchical diffusion framework that learns spatial-temporal correspondences in talking head synthesis without compromising the model's intrinsic quality and adaptability.~DreamHead learns to predict dense facial landmarks from audios as intermediate signals to model the spatial and temporal correspondences.~Specifically, a first hierarchy of audio-to-landmark diffusion is first designed to predict temporally smooth and accurate landmark sequences given audio sequence signals. Then, a second hierarchy of landmark-to-image diffusion is further proposed to produce spatially consistent facial portrait videos, by modeling spatial correspondences between the dense facial landmark and appearance. Extensive experiments show that proposed DreamHead can effectively learn spatial-temporal consistency with the designed hierarchical diffusion and produce high-fidelity audio-driven talking head videos for multiple identities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10281v1-abstract-full').style.display = 'none'; document.getElementById('2409.10281v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15435">arXiv:2408.15435</a> <span> [<a href="https://arxiv.org/pdf/2408.15435">pdf</a>, <a href="https://arxiv.org/format/2408.15435">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Globally Optimal Movable Antenna-Enhanced multi-user Communication: Discrete Antenna Positioning, Motion Power Consumption, and Imperfect CSI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yifei Wu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Gerstacker%2C+W">Wolfgang Gerstacker</a>, <a href="/search/eess?searchtype=author&query=Schober%2C+R">Robert Schober</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15435v1-abstract-short" style="display: inline;"> Movable antennas (MAs) represent a promising paradigm to enhance the spatial degrees of freedom of conventional multi-antenna systems by dynamically adapting the positions of antenna elements within a designated transmit area. In particular, by employing electro-mechanical MA drivers, the positions of the MA elements can be adjusted to shape a favorable spatial correlation for improving system per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15435v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15435v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15435v1-abstract-full" style="display: none;"> Movable antennas (MAs) represent a promising paradigm to enhance the spatial degrees of freedom of conventional multi-antenna systems by dynamically adapting the positions of antenna elements within a designated transmit area. In particular, by employing electro-mechanical MA drivers, the positions of the MA elements can be adjusted to shape a favorable spatial correlation for improving system performance. Although preliminary research has explored beamforming designs for MA systems, the intricacies of the power consumption and the precise positioning of MA elements are not well understood. Moreover, the assumption of perfect CSI adopted in the literature is impractical due to the significant pilot overhead and the extensive time to acquire perfect CSI. To address these challenges, we model the motion of MA elements through discrete steps and quantify the associated power consumption as a function of these movements. Furthermore, by leveraging the properties of the MA channel model, we introduce a novel CSI error model tailored for MA systems that facilitates robust resource allocation design. In particular, we optimize the beamforming and the MA positions at the BS to minimize the total BS power consumption, encompassing both radiated and MA motion power while guaranteeing a minimum required SINR for each user. To this end, novel algorithms exploiting the branch and bound (BnB) method are developed to obtain the optimal solution for perfect and imperfect CSI. Moreover, to support practical implementation, we propose low-complexity algorithms with guaranteed convergence by leveraging successive convex approximation (SCA). Our numerical results validate the optimality of the proposed BnB-based algorithms. Furthermore, we unveil that both proposed SCA-based algorithms approach the optimal performance within a few iterations, thus highlighting their practical advantages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15435v1-abstract-full').style.display = 'none'; document.getElementById('2408.15435v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13829">arXiv:2408.13829</a> <span> [<a href="https://arxiv.org/pdf/2408.13829">pdf</a>, <a href="https://arxiv.org/ps/2408.13829">ps</a>, <a href="https://arxiv.org/format/2408.13829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Sensing-aided Near-Field Secure Communications with Mobile Eavesdroppers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yiming Xu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+M">Mingxuan Zheng</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=da+Costa%2C+D+B">Daniel Benevides da Costa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13829v1-abstract-short" style="display: inline;"> The additional degree of freedom (DoF) in the distance domain of near-field communication offers new opportunities for physical layer security (PLS) design. However, existing works mainly consider static eavesdroppers, and the related study with mobile eavesdroppers is still in its infancy due to the difficulty in obtaining the channel state information (CSI) of the eavesdropper. To this end, we p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13829v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13829v1-abstract-full" style="display: none;"> The additional degree of freedom (DoF) in the distance domain of near-field communication offers new opportunities for physical layer security (PLS) design. However, existing works mainly consider static eavesdroppers, and the related study with mobile eavesdroppers is still in its infancy due to the difficulty in obtaining the channel state information (CSI) of the eavesdropper. To this end, we propose to leverage the sensing capability of integrated sensing and communication (ISAC) systems to assist PLS design. To comprehensively study the dynamic behaviors of the system, we propose a Pareto optimization framework, where a multi-objective optimization problem (MOOP) is formulated to simultaneously optimize three key performance metrics: power consumption, number of securely served users, and tracking performance, while guaranteeing the achievable rate of the users with a given leakage rate constraint. A globally optimal design based on the generalized Benders decomposition (GBD) method is proposed to achieve the Pareto optimal solutions. To reduce the computational complexity, we further design a low-complexity algorithm based on zero-forcing (ZF) beamforming and successive convex approximation (SCA). Simulation results validate the effectiveness of the proposed algorithms and reveal the intrinsic trade-offs between the three performance metrics. It is observed that near-field communication offers a favorable beam diffraction effect for PLS, where the energy of the information signal is nulled around the eavesdropper and focused on the users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13829v1-abstract-full').style.display = 'none'; document.getElementById('2408.13829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10561">arXiv:2408.10561</a> <span> [<a href="https://arxiv.org/pdf/2408.10561">pdf</a>, <a href="https://arxiv.org/format/2408.10561">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ICSD: An Open-source Dataset for Infant Cry and Snoring Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Qingyu Liu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+L">Longfei Song</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongxing Xu</a>, <a href="/search/eess?searchtype=author&query=Long%2C+Y">Yanhua Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10561v1-abstract-short" style="display: inline;"> The detection and analysis of infant cry and snoring events are crucial tasks within the field of audio signal processing. While existing datasets for general sound event detection are plentiful, they often fall short in providing sufficient, strongly labeled data specific to infant cries and snoring. To provide a benchmark dataset and thus foster the research of infant cry and snoring detection,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10561v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10561v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10561v1-abstract-full" style="display: none;"> The detection and analysis of infant cry and snoring events are crucial tasks within the field of audio signal processing. While existing datasets for general sound event detection are plentiful, they often fall short in providing sufficient, strongly labeled data specific to infant cries and snoring. To provide a benchmark dataset and thus foster the research of infant cry and snoring detection, this paper introduces the Infant Cry and Snoring Detection (ICSD) dataset, a novel, publicly available dataset specially designed for ICSD tasks. The ICSD comprises three types of subsets: a real strongly labeled subset with event-based labels annotated manually, a weakly labeled subset with only clip-level event annotations, and a synthetic subset generated and labeled with strong annotations. This paper provides a detailed description of the ICSD creation process, including the challenges encountered and the solutions adopted. We offer a comprehensive characterization of the dataset, discussing its limitations and key factors for ICSD usage. Additionally, we conduct extensive experiments on the ICSD dataset to establish baseline systems and offer insights into the main factors when using this dataset for ICSD research. Our goal is to develop a dataset that will be widely adopted by the community as a new open benchmark for future ICSD research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10561v1-abstract-full').style.display = 'none'; document.getElementById('2408.10561v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17841">arXiv:2407.17841</a> <span> [<a href="https://arxiv.org/pdf/2407.17841">pdf</a>, <a href="https://arxiv.org/ps/2407.17841">ps</a>, <a href="https://arxiv.org/format/2407.17841">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Two-Timescale Design for Movable Antenna Array-Enabled Multiuser Uplink Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+G">Guojie Hu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Donghui Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kui Xu</a>, <a href="/search/eess?searchtype=author&query=Si%2C+J">Jiangbo Si</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yunlong Cai</a>, <a href="/search/eess?searchtype=author&query=Al-Dhahir%2C+N">Naofal Al-Dhahir</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17841v1-abstract-short" style="display: inline;"> Movable antenna (MA) technology can flexibly reconfigure wireless channels by adjusting antenna positions in a local region, thus owing great potential for enhancing communication performance. This letter investigates MA technology enabled multiuser uplink communications over general Rician fading channels, which consist of a base station (BS) equipped with the MA array and multiple single-antenna… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17841v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17841v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17841v1-abstract-full" style="display: none;"> Movable antenna (MA) technology can flexibly reconfigure wireless channels by adjusting antenna positions in a local region, thus owing great potential for enhancing communication performance. This letter investigates MA technology enabled multiuser uplink communications over general Rician fading channels, which consist of a base station (BS) equipped with the MA array and multiple single-antenna users. Since it is practically challenging to collect all instantaneous channel state information (CSI) by traversing all possible antenna positions at the BS, we instead propose a two-timescale scheme for maximizing the ergodic sum rate. Specifically, antenna positions at the BS are first optimized using only the statistical CSI. Subsequently, the receiving beamforming at the BS (for which we consider the three typical zero-forcing (ZF), minimum mean-square error (MMSE) and MMSE with successive interference cancellation (MMSE-SIC) receivers) is designed based on the instantaneous CSI with optimized antenna positions, thus significantly reducing practical implementation complexities. The formulated problems are highly non-convex and we develop projected gradient ascent (PGA) algorithms to effectively handle them. Simulation results illustrate that compared to conventional fixed-position antenna (FPA) array, the MA array can achieve significant performance gains by reaping an additional spatial degree of freedom. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17841v1-abstract-full').style.display = 'none'; document.getElementById('2407.17841v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07615">arXiv:2407.07615</a> <span> [<a href="https://arxiv.org/pdf/2407.07615">pdf</a>, <a href="https://arxiv.org/format/2407.07615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Finite Control Set Model Predictive Control with Limit Cycle Stability Guarantees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Duo Xu</a>, <a href="/search/eess?searchtype=author&query=Lazar%2C+M">Mircea Lazar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07615v1-abstract-short" style="display: inline;"> This paper considers the design of finite control set model predictive control (FCS-MPC) for discrete-time switched affine systems. Existing FCS-MPC methods typically pursue practical stability guarantees, which ensure convergence to a bounded invariant set that contains a desired steady state. As such, current FCS-MPC methods result in unpredictable steady-state behavior due to arbitrary switchin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07615v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07615v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07615v1-abstract-full" style="display: none;"> This paper considers the design of finite control set model predictive control (FCS-MPC) for discrete-time switched affine systems. Existing FCS-MPC methods typically pursue practical stability guarantees, which ensure convergence to a bounded invariant set that contains a desired steady state. As such, current FCS-MPC methods result in unpredictable steady-state behavior due to arbitrary switching among the available finite control inputs. Motivated by this, we present a FCS-MPC design that aims to stabilize a steady-state limit cycle compatible with a desired output reference via a suitable cost function. We provide conditions in terms of periodic terminal costs and finite control set control laws that guarantee asymptotic stability of the developed limit cycle FCS-MPC algorithm. Moreover, we develop conditions for recursive feasibility of limit cycle FCS-MPC in terms of periodic terminal sets and we provide systematic methods for computing ellipsoidal and polytopic periodically invariant sets that contain a desired steady-state limit cycle. Compared to existing periodic terminal ingredients for tracking MPC with a continuous control set, we design and compute terminal ingredients using a finite control set. The developed methodology is validated on switched systems and power electronics benchmark examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07615v1-abstract-full').style.display = 'none'; document.getElementById('2407.07615v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03307">arXiv:2407.03307</a> <span> [<a href="https://arxiv.org/pdf/2407.03307">pdf</a>, <a href="https://arxiv.org/format/2407.03307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HoloHisto: End-to-end Gigapixel WSI Segmentation with 4K Resolution Sequential Tokenization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/eess?searchtype=author&query=He%2C+Y">Yufan He</a>, <a href="/search/eess?searchtype=author&query=Nath%2C+V">Vishwesh Nath</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+P">Pengfeig Guo</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Ziyue Xu</a>, <a href="/search/eess?searchtype=author&query=Roth%2C+H">Holger Roth</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Daguang Xu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/eess?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03307v1-abstract-short" style="display: inline;"> In digital pathology, the traditional method for deep learning-based image segmentation typically involves a two-stage process: initially segmenting high-resolution whole slide images (WSI) into smaller patches (e.g., 256x256, 512x512, 1024x1024) and subsequently reconstructing them to their original scale. This method often struggles to capture the complex details and vast scope of WSIs. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03307v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03307v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03307v1-abstract-full" style="display: none;"> In digital pathology, the traditional method for deep learning-based image segmentation typically involves a two-stage process: initially segmenting high-resolution whole slide images (WSI) into smaller patches (e.g., 256x256, 512x512, 1024x1024) and subsequently reconstructing them to their original scale. This method often struggles to capture the complex details and vast scope of WSIs. In this paper, we propose the holistic histopathology (HoloHisto) segmentation method to achieve end-to-end segmentation on gigapixel WSIs, whose maximum resolution is above 80,000$\times$70,000 pixels. HoloHisto fundamentally shifts the paradigm of WSI segmentation to an end-to-end learning fashion with 1) a large (4K) resolution base patch for elevated visual information inclusion and efficient processing, and 2) a novel sequential tokenization mechanism to properly model the contextual relationships and efficiently model the rich information from the 4K input. To our best knowledge, HoloHisto presents the first holistic approach for gigapixel resolution WSI segmentation, supporting direct I/O of complete WSI and their corresponding gigapixel masks. Under the HoloHisto platform, we unveil a random 4K sampler that transcends ultra-high resolution, delivering 31 and 10 times more pixels than standard 2D and 3D patches, respectively, for advancing computational capabilities. To facilitate efficient 4K resolution dense prediction, we leverage sequential tokenization, utilizing a pre-trained image tokenizer to group image features into a discrete token grid. To assess the performance, our team curated a new kidney pathology image segmentation (KPIs) dataset with WSI-level glomeruli segmentation from whole mouse kidneys. From the results, HoloHisto-4K delivers remarkable performance gains over previous state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03307v1-abstract-full').style.display = 'none'; document.getElementById('2407.03307v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02877">arXiv:2407.02877</a> <span> [<a href="https://arxiv.org/pdf/2407.02877">pdf</a>, <a href="https://arxiv.org/format/2407.02877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Resource Allocation Design for Next-Generation Multiple Access: A Tutorial Overview </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+Z">Zhiqiang Wei</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shuangyang Li</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Caire%2C+G">Giuseppe Caire</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02877v1-abstract-short" style="display: inline;"> Multiple access is the cornerstone technology for each generation of wireless cellular networks and resource allocation design plays a crucial role in multiple access. In this paper, we present a comprehensive tutorial overview for junior researchers in this field, aiming to offer a foundational guide for resource allocation design in the context of next-generation multiple access (NGMA). Initiall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02877v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02877v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02877v1-abstract-full" style="display: none;"> Multiple access is the cornerstone technology for each generation of wireless cellular networks and resource allocation design plays a crucial role in multiple access. In this paper, we present a comprehensive tutorial overview for junior researchers in this field, aiming to offer a foundational guide for resource allocation design in the context of next-generation multiple access (NGMA). Initially, we identify three types of channels in future wireless cellular networks over which NGMA will be implemented, namely: natural channels, reconfigurable channels, and functional channels. Natural channels are traditional uplink and downlink communication channels; reconfigurable channels are defined as channels that can be proactively reshaped via emerging platforms or techniques, such as intelligent reflecting surface (IRS), unmanned aerial vehicle (UAV), and movable/fluid antenna (M/FA); and functional channels support not only communication but also other functionalities simultaneously, with typical examples including integrated sensing and communication (ISAC) and joint computing and communication (JCAC) channels. Then, we introduce NGMA models applicable to these three types of channels that cover most of the practical communication scenarios of future wireless communications. Subsequently, we articulate the key optimization technical challenges inherent in the resource allocation design for NGMA, categorizing them into rate-oriented, power-oriented, and reliability-oriented resource allocation designs. The corresponding optimization approaches for solving the formulated resource allocation design problems are then presented. Finally, simulation results are presented and discussed to elucidate the practical implications and insights derived from resource allocation designs in NGMA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02877v1-abstract-full').style.display = 'none'; document.getElementById('2407.02877v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">69 pages, 10 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16888">arXiv:2406.16888</a> <span> [<a href="https://arxiv.org/pdf/2406.16888">pdf</a>, <a href="https://arxiv.org/format/2406.16888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Efficient UAV Hovering, Resource Allocation, and Trajectory Design for ISAC with Limited Backhaul Capacity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khalili%2C+A">Ata Khalili</a>, <a href="/search/eess?searchtype=author&query=Rezaei%2C+A">Atefeh Rezaei</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Dressler%2C+F">Falko Dressler</a>, <a href="/search/eess?searchtype=author&query=Schober%2C+R">Robert Schober</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16888v2-abstract-short" style="display: inline;"> In this paper, we investigate the joint resource allocation and trajectory design for a multi-user, multi-target unmanned aerial vehicle (UAV)-enabled integrated sensing and communication (ISAC) system, where the link capacity between a ground base station (BS) and the UAV is limited. The UAV conducts target sensing and information transmission in orthogonal time slots to prevent interference. As… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16888v2-abstract-full').style.display = 'inline'; document.getElementById('2406.16888v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16888v2-abstract-full" style="display: none;"> In this paper, we investigate the joint resource allocation and trajectory design for a multi-user, multi-target unmanned aerial vehicle (UAV)-enabled integrated sensing and communication (ISAC) system, where the link capacity between a ground base station (BS) and the UAV is limited. The UAV conducts target sensing and information transmission in orthogonal time slots to prevent interference. As is common in practical systems, sensing is performed while the UAV hovers, allowing the UAV to acquire high-quality sensing data. Subsequently, the acquired sensing data is offloaded to the ground BS for further processing. We jointly optimize the UAV trajectory, UAV velocity, beamforming for the communication users, power allocated to the sensing beam, and time of hovering for sensing to minimize the power consumption of the UAV while ensuring the communication quality of service (QoS) and successful sensing. Due to the prohibitively high complexity of the resulting non-convex mixed integer non-linear program (MINLP), we employ a series of transformations and optimization techniques, including semidefinite relaxation, big-M method, penalty approach, and successive convex approximation, to obtain a low-complexity suboptimal solution. Our simulation results reveal that 1) the proposed design achieves significant power savings compared to two baseline schemes; 2) stricter sensing requirements lead to longer sensing times, highlighting the challenge of efficiently managing both sensing accuracy and sensing time; 3) the optimized trajectory design ensures precise hovering directly above the targets during sensing, enhancing sensing quality and enabling the application of energy-focused beams; and 4) the proposed trajectory design balances the capacity of the backhaul link and the downlink rate of the communication users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16888v2-abstract-full').style.display = 'none'; document.getElementById('2406.16888v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10897">arXiv:2406.10897</a> <span> [<a href="https://arxiv.org/pdf/2406.10897">pdf</a>, <a href="https://arxiv.org/ps/2406.10897">ps</a>, <a href="https://arxiv.org/format/2406.10897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> When NOMA Meets AIGC: Enhanced Wireless Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Ding Xu</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+L">Lingjie Duan</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+H">Hongbo Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10897v1-abstract-short" style="display: inline;"> Wireless federated learning (WFL) enables devices to collaboratively train a global model via local model training, uploading and aggregating. However, WFL faces the data scarcity/heterogeneity problem (i.e., data are limited and unevenly distributed among devices) that degrades the learning performance. In this regard, artificial intelligence generated content (AIGC) can synthesize various types… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10897v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10897v1-abstract-full" style="display: none;"> Wireless federated learning (WFL) enables devices to collaboratively train a global model via local model training, uploading and aggregating. However, WFL faces the data scarcity/heterogeneity problem (i.e., data are limited and unevenly distributed among devices) that degrades the learning performance. In this regard, artificial intelligence generated content (AIGC) can synthesize various types of data to compensate for the insufficient local data. Nevertheless, downloading synthetic data or uploading local models iteratively takes a lot of time, especially for a large amount of devices. To address this issue, we propose to leverage non-orthogonal multiple access (NOMA) to achieve efficient synthetic data and local model transmission. This paper is the first to combine AIGC and NOMA with WFL to maximally enhance the learning performance. For the proposed NOMA+AIGC-enhanced WFL, the problem of jointly optimizing the synthetic data distribution, two-way communication and computation resource allocation to minimize the global learning error is investigated. The problem belongs to NP-hard mixed integer nonlinear programming, whose optimal solution is intractable to find. We first employ the block coordinate descent method to decouple the complicated-coupled variables, and then resort to our analytical method to derive an efficient low-complexity local optimal solution with partial closed-form results. Extensive simulations validate the superiority of the proposed scheme compared to the existing and benchmark schemes such as the frequency/time division multiple access based AIGC-enhanced schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10897v1-abstract-full').style.display = 'none'; document.getElementById('2406.10897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, submitted to IEEE TWC for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10895">arXiv:2406.10895</a> <span> [<a href="https://arxiv.org/pdf/2406.10895">pdf</a>, <a href="https://arxiv.org/ps/2406.10895">ps</a>, <a href="https://arxiv.org/format/2406.10895">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Fair Computation Offloading for RSMA-Assisted Mobile Edge Computing Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Ding Xu</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+L">Lingjie Duan</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+H">Haitao Zhao</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+H">Hongbo Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10895v2-abstract-short" style="display: inline;"> Rate splitting multiple access (RSMA) provides a flexible transmission framework that can be applied in mobile edge computing (MEC) systems. However, the research work on RSMA-assisted MEC systems is still at the infancy and many design issues remain unsolved, such as the MEC server and channel allocation problem in general multi-server and multi-channel scenarios as well as the user fairness issu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10895v2-abstract-full').style.display = 'inline'; document.getElementById('2406.10895v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10895v2-abstract-full" style="display: none;"> Rate splitting multiple access (RSMA) provides a flexible transmission framework that can be applied in mobile edge computing (MEC) systems. However, the research work on RSMA-assisted MEC systems is still at the infancy and many design issues remain unsolved, such as the MEC server and channel allocation problem in general multi-server and multi-channel scenarios as well as the user fairness issues. In this regard, we study an RSMA-assisted MEC system with multiple MEC servers, channels and devices, and consider the fairness among devices. A max-min fairness computation offloading problem to maximize the minimum computation offloading rate is investigated. Since the problem is difficult to solve optimally, we develop an efficient algorithm to obtain a suboptimal solution. Particularly, the time allocation and the computing frequency allocation are derived as closed-form functions of the transmit power allocation and the successive interference cancellation (SIC) decoding order, while the transmit power allocation and the SIC decoding order are jointly optimized via the alternating optimization method, the bisection search method and the successive convex approximation method. For the channel and MEC server allocation problem, we transform it into a hypergraph matching problem and solve it by matching theory. Simulation results demonstrate that the proposed RSMA-assisted MEC system outperforms current MEC systems under various system setups. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10895v2-abstract-full').style.display = 'none'; document.getElementById('2406.10895v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12357">arXiv:2405.12357</a> <span> [<a href="https://arxiv.org/pdf/2405.12357">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Paired Conditional Generative Adversarial Network for Highly Accelerated Liver 4D MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/eess?searchtype=author&query=Miao%2C+X">Xin Miao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hengjie Liu</a>, <a href="/search/eess?searchtype=author&query=Scholey%2C+J+E">Jessica E. Scholey</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+W">Wensha Yang</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+M">Mary Feng</a>, <a href="/search/eess?searchtype=author&query=Ohliger%2C+M">Michael Ohliger</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+H">Hui Lin</a>, <a href="/search/eess?searchtype=author&query=Lao%2C+Y">Yi Lao</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/eess?searchtype=author&query=Sheng%2C+K">Ke Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12357v1-abstract-short" style="display: inline;"> Purpose: 4D MRI with high spatiotemporal resolution is desired for image-guided liver radiotherapy. Acquiring densely sampling k-space data is time-consuming. Accelerated acquisition with sparse samples is desirable but often causes degraded image quality or long reconstruction time. We propose the Reconstruct Paired Conditional Generative Adversarial Network (Re-Con-GAN) to shorten the 4D MRI rec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12357v1-abstract-full').style.display = 'inline'; document.getElementById('2405.12357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12357v1-abstract-full" style="display: none;"> Purpose: 4D MRI with high spatiotemporal resolution is desired for image-guided liver radiotherapy. Acquiring densely sampling k-space data is time-consuming. Accelerated acquisition with sparse samples is desirable but often causes degraded image quality or long reconstruction time. We propose the Reconstruct Paired Conditional Generative Adversarial Network (Re-Con-GAN) to shorten the 4D MRI reconstruction time while maintaining the reconstruction quality. Methods: Patients who underwent free-breathing liver 4D MRI were included in the study. Fully- and retrospectively under-sampled data at 3, 6 and 10 times (3x, 6x and 10x) were first reconstructed using the nuFFT algorithm. Re-Con-GAN then trained input and output in pairs. Three types of networks, ResNet9, UNet and reconstruction swin transformer, were explored as generators. PatchGAN was selected as the discriminator. Re-Con-GAN processed the data (3D+t) as temporal slices (2D+t). A total of 48 patients with 12332 temporal slices were split into training (37 patients with 10721 slices) and test (11 patients with 1611 slices). Results: Re-Con-GAN consistently achieved comparable/better PSNR, SSIM, and RMSE scores compared to CS/UNet models. The inference time of Re-Con-GAN, UNet and CS are 0.15s, 0.16s, and 120s. The GTV detection task showed that Re-Con-GAN and CS, compared to UNet, better improved the dice score (3x Re-Con-GAN 80.98%; 3x CS 80.74%; 3x UNet 79.88%) of unprocessed under-sampled images (3x 69.61%). Conclusion: A generative network with adversarial training is proposed with promising and efficient reconstruction results demonstrated on an in-house dataset. The rapid and qualitative reconstruction of 4D liver MR has the potential to facilitate online adaptive MR-guided radiotherapy for liver cancer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12357v1-abstract-full').style.display = 'none'; document.getElementById('2405.12357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11093">arXiv:2405.11093</a> <span> [<a href="https://arxiv.org/pdf/2405.11093">pdf</a>, <a href="https://arxiv.org/format/2405.11093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> AudioSetMix: Enhancing Audio-Language Datasets with LLM-Assisted Augmentations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">David Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11093v2-abstract-short" style="display: inline;"> Multi-modal learning in the audio-language domain has seen significant advancements in recent years. However, audio-language learning faces challenges due to limited and lower-quality data compared to image-language tasks. Existing audio-language datasets are notably smaller, and manual labeling is hindered by the need to listen to entire audio clips for accurate labeling. Our method systematica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11093v2-abstract-full').style.display = 'inline'; document.getElementById('2405.11093v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11093v2-abstract-full" style="display: none;"> Multi-modal learning in the audio-language domain has seen significant advancements in recent years. However, audio-language learning faces challenges due to limited and lower-quality data compared to image-language tasks. Existing audio-language datasets are notably smaller, and manual labeling is hindered by the need to listen to entire audio clips for accurate labeling. Our method systematically generates audio-caption pairs by augmenting audio clips with natural language labels and corresponding audio signal processing operations. Leveraging a Large Language Model, we generate descriptions of augmented audio clips with a prompt template. This scalable method produces AudioSetMix, a high-quality training dataset for text-and-audio related models. Integration of our dataset improves models performance on benchmarks by providing diversified and better-aligned examples. Notably, our dataset addresses the absence of modifiers (adjectives and adverbs) in existing datasets. By enabling models to learn these concepts, and generating hard negative examples during training, we achieve state-of-the-art performance on multiple benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11093v2-abstract-full').style.display = 'none'; document.getElementById('2405.11093v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">typos corrected</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04274">arXiv:2405.04274</a> <span> [<a href="https://arxiv.org/pdf/2405.04274">pdf</a>, <a href="https://arxiv.org/format/2405.04274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3664647.3680943">10.1145/3664647.3680943 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Group-aware Parameter-efficient Updating for Content-Adaptive Neural Video Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhenghao Chen</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+L">Luping Zhou</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Z">Zhihao Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04274v2-abstract-short" style="display: inline;"> Content-adaptive compression is crucial for enhancing the adaptability of the pre-trained neural codec for various contents. Although these methods have been very practical in neural image compression (NIC), their application in neural video compression (NVC) is still limited due to two main aspects: 1), video compression relies heavily on temporal redundancy, therefore updating just one or a few… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04274v2-abstract-full').style.display = 'inline'; document.getElementById('2405.04274v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04274v2-abstract-full" style="display: none;"> Content-adaptive compression is crucial for enhancing the adaptability of the pre-trained neural codec for various contents. Although these methods have been very practical in neural image compression (NIC), their application in neural video compression (NVC) is still limited due to two main aspects: 1), video compression relies heavily on temporal redundancy, therefore updating just one or a few frames can lead to significant errors accumulating over time; 2), NVC frameworks are generally more complex, with many large components that are not easy to update quickly during encoding. To address the previously mentioned challenges, we have developed a content-adaptive NVC technique called Group-aware Parameter-Efficient Updating (GPU). Initially, to minimize error accumulation, we adopt a group-aware approach for updating encoder parameters. This involves adopting a patch-based Group of Pictures (GoP) training strategy to segment a video into patch-based GoPs, which will be updated to facilitate a globally optimized domain-transferable solution. Subsequently, we introduce a parameter-efficient delta-tuning strategy, which is achieved by integrating several light-weight adapters into each coding component of the encoding process by both serial and parallel configuration. Such architecture-agnostic modules stimulate the components with large parameters, thereby reducing both the update cost and the encoding time. We incorporate our GPU into the latest NVC framework and conduct comprehensive experiments, whose results showcase outstanding video compression efficiency across four video benchmarks and adaptability of one medical image benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04274v2-abstract-full').style.display = 'none'; document.getElementById('2405.04274v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM MM 2024, Melbourne, Australia</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00316">arXiv:2405.00316</a> <span> [<a href="https://arxiv.org/pdf/2405.00316">pdf</a>, <a href="https://arxiv.org/format/2405.00316">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Enhance Planning with Physics-informed Safety Controller for End-to-end Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Hang Zhou</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Haichao Liu</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+H">Hongliang Lu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dan Xu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+Y">Yiding Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00316v2-abstract-short" style="display: inline;"> Recent years have seen a growing research interest in applications of Deep Neural Networks (DNN) on autonomous vehicle technology. The trend started with perception and prediction a few years ago and it is gradually being applied to motion planning tasks. Despite the performance of networks improve over time, DNN planners inherit the natural drawbacks of Deep Learning. Learning-based planners have… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00316v2-abstract-full').style.display = 'inline'; document.getElementById('2405.00316v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00316v2-abstract-full" style="display: none;"> Recent years have seen a growing research interest in applications of Deep Neural Networks (DNN) on autonomous vehicle technology. The trend started with perception and prediction a few years ago and it is gradually being applied to motion planning tasks. Despite the performance of networks improve over time, DNN planners inherit the natural drawbacks of Deep Learning. Learning-based planners have limitations in achieving perfect accuracy on the training dataset and network performance can be affected by out-of-distribution problem. In this paper, we propose FusionAssurance, a novel trajectory-based end-to-end driving fusion framework which combines physics-informed control for safety assurance. By incorporating Potential Field into Model Predictive Control, FusionAssurance is capable of navigating through scenarios that are not included in the training dataset and scenarios where neural network fail to generalize. The effectiveness of the approach is demonstrated by extensive experiments under various scenarios on the CARLA benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00316v2-abstract-full').style.display = 'none'; document.getElementById('2405.00316v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.18705">arXiv:2404.18705</a> <span> [<a href="https://arxiv.org/pdf/2404.18705">pdf</a>, <a href="https://arxiv.org/format/2404.18705">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Wireless Information and Energy Transfer in the Era of 6G Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Psomas%2C+C">Constantinos Psomas</a>, <a href="/search/eess?searchtype=author&query=Ntougias%2C+K">Konstantinos Ntougias</a>, <a href="/search/eess?searchtype=author&query=Shanin%2C+N">Nikita Shanin</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Mayer%2C+K+M">Kenneth MacSporran Mayer</a>, <a href="/search/eess?searchtype=author&query=Tran%2C+N+M">Nguyen Minh Tran</a>, <a href="/search/eess?searchtype=author&query=Cottatellucci%2C+L">Laura Cottatellucci</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+K+W">Kae Won Choi</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/eess?searchtype=author&query=Schober%2C+R">Robert Schober</a>, <a href="/search/eess?searchtype=author&query=Krikidis%2C+I">Ioannis Krikidis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.18705v2-abstract-short" style="display: inline;"> Wireless information and energy transfer (WIET) represents an emerging paradigm which employs controllable transmission of radio-frequency signals for the dual purpose of data communication and wireless charging. As such, WIET is widely regarded as an enabler of envisioned 6G use cases that rely on energy-sustainable Internet-of-Things (IoT) networks, such as smart cities and smart grids. Meeting… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18705v2-abstract-full').style.display = 'inline'; document.getElementById('2404.18705v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.18705v2-abstract-full" style="display: none;"> Wireless information and energy transfer (WIET) represents an emerging paradigm which employs controllable transmission of radio-frequency signals for the dual purpose of data communication and wireless charging. As such, WIET is widely regarded as an enabler of envisioned 6G use cases that rely on energy-sustainable Internet-of-Things (IoT) networks, such as smart cities and smart grids. Meeting the quality-of-service demands of WIET, in terms of both data transfer and power delivery, requires effective co-design of the information and energy signals. In this article, we present the main principles and design aspects of WIET, focusing on its integration in 6G networks. First, we discuss how conventional communication notions such as resource allocation and waveform design need to be revisited in the context of WIET. Next, we consider various candidate 6G technologies that can boost WIET efficiency, namely, holographic multiple-input multiple-output, near-field beamforming, terahertz communication, intelligent reflecting surfaces (IRSs), and reconfigurable (fluid) antenna arrays. We introduce respective WIET design methods, analyze the promising performance gains of these WIET systems, and discuss challenges, open issues, and future research directions. Finally, a near-field energy beamforming scheme and a power-based IRS beamforming algorithm are experimentally validated using a wireless energy transfer testbed. The vision of WIET in communication systems has been gaining momentum in recent years, with constant progress with respect to theoretical but also practical aspects. The comprehensive overview of the state of the art of WIET presented in this paper highlights the potentials of WIET systems as well as their overall benefits in 6G networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18705v2-abstract-full').style.display = 'none'; document.getElementById('2404.18705v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the IEEE, 36 pages, 33 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16477">arXiv:2403.16477</a> <span> [<a href="https://arxiv.org/pdf/2403.16477">pdf</a>, <a href="https://arxiv.org/format/2403.16477">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Safeguarding Next Generation Multiple Access Using Physical Layer Security Techniques: A Tutorial </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lv%2C+L">Lu Lv</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongyang Xu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+R+Q">Rose Qingyang Hu</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Y">Yinghui Ye</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Long Yang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+X">Xianfu Lei</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xianbin Wang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/eess?searchtype=author&query=Nallanathan%2C+A">Arumugam Nallanathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16477v3-abstract-short" style="display: inline;"> Driven by the ever-increasing requirements of ultra-high spectral efficiency, ultra-low latency, and massive connectivity, the forefront of wireless research calls for the design of advanced next generation multiple access schemes to facilitate provisioning of these stringent demands. This inspires the embrace of non-orthogonal multiple access (NOMA) in future wireless communication networks. Neve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16477v3-abstract-full').style.display = 'inline'; document.getElementById('2403.16477v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16477v3-abstract-full" style="display: none;"> Driven by the ever-increasing requirements of ultra-high spectral efficiency, ultra-low latency, and massive connectivity, the forefront of wireless research calls for the design of advanced next generation multiple access schemes to facilitate provisioning of these stringent demands. This inspires the embrace of non-orthogonal multiple access (NOMA) in future wireless communication networks. Nevertheless, the support of massive access via NOMA leads to additional security threats, due to the open nature of the air interface, the broadcast characteristic of radio propagation as well as intertwined relationship among paired NOMA users. To address this specific challenge, the superimposed transmission of NOMA can be explored as new opportunities for security aware design, for example, multiuser interference inherent in NOMA can be constructively engineered to benefit communication secrecy and privacy. The purpose of this tutorial is to provide a comprehensive overview on the state-of-the-art physical layer security techniques that guarantee wireless security and privacy for NOMA networks, along with the opportunities, technical challenges, and future research trends. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16477v3-abstract-full').style.display = 'none'; document.getElementById('2403.16477v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Invited paper by Proceedings of the IEEE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09976">arXiv:2402.09976</a> <span> [<a href="https://arxiv.org/pdf/2402.09976">pdf</a>, <a href="https://arxiv.org/ps/2402.09976">ps</a>, <a href="https://arxiv.org/format/2402.09976">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sensing-assisted Robust SWIPT for Mobile Energy Harvesting Receivers in Networked ISAC Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yiming Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09976v2-abstract-short" style="display: inline;"> Simultaneous wireless information and power transfer (SWIPT) has been proposed to offer communication services and transfer power to the energy harvesting receiver (EHR) concurrently. However, existing works mainly focused on static EHRs, without considering the location uncertainty caused by the movement of EHRs and location estimation errors. To tackle this issue, this paper considers the sensin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09976v2-abstract-full').style.display = 'inline'; document.getElementById('2402.09976v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09976v2-abstract-full" style="display: none;"> Simultaneous wireless information and power transfer (SWIPT) has been proposed to offer communication services and transfer power to the energy harvesting receiver (EHR) concurrently. However, existing works mainly focused on static EHRs, without considering the location uncertainty caused by the movement of EHRs and location estimation errors. To tackle this issue, this paper considers the sensing-assisted SWIPT design in a networked integrated sensing and communication (ISAC) system in the presence of location uncertainty. A two-phase robust design is proposed to reduce the location uncertainty and improve the power transfer efficiency. In particular, each time frame is divided into two phases, i.e., sensing and WPT phases, via time-splitting. The sensing phase performs collaborative sensing to localize the EHR, whose results are then utilized in the WPT phase for efficient WPT. To minimize the power consumption with given communication and power transfer requirements, a two-layer optimization framework is proposed to jointly optimize the time-splitting ratio, coordinated beamforming policy, and sensing node selection. Simulation results validate the effectiveness of the proposed design and demonstrate the existence of an optimal time-splitting ratio for given location uncertainty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09976v2-abstract-full').style.display = 'none'; document.getElementById('2402.09976v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09974">arXiv:2402.09974</a> <span> [<a href="https://arxiv.org/pdf/2402.09974">pdf</a>, <a href="https://arxiv.org/ps/2402.09974">ps</a>, <a href="https://arxiv.org/format/2402.09974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Interference Mitigation for Network-Level ISAC: An Optimization Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yiming Xu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Schober%2C+R">Robert Schober</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09974v1-abstract-short" style="display: inline;"> Future wireless networks are envisioned to simultaneously provide high data-rate communication and ubiquitous environment-aware services for numerous users. One promising approach to meet this demand is to employ network-level integrated sensing and communications (ISAC) by jointly designing the signal processing and resource allocation over the entire network. However, to unleash the full potenti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09974v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09974v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09974v1-abstract-full" style="display: none;"> Future wireless networks are envisioned to simultaneously provide high data-rate communication and ubiquitous environment-aware services for numerous users. One promising approach to meet this demand is to employ network-level integrated sensing and communications (ISAC) by jointly designing the signal processing and resource allocation over the entire network. However, to unleash the full potential of network-level ISAC, some critical challenges must be tackled. Among them, interference management is one of the most significant ones. In this article, we build up a bridge between interference mitigation techniques and the corresponding optimization methods, which facilitates efficient interference mitigation in network-level ISAC systems. In particular, we first identify several types of interference in network-level ISAC systems, including self-interference, mutual interference, crosstalk, clutter, and multiuser interference. Then, we present several promising techniques that can be utilized to suppress specific types of interference. For each type of interference, we discuss the corresponding problem formulation and identify the associated optimization methods. Moreover, to illustrate the effectiveness of the proposed interference mitigation techniques, two concrete network-level ISAC systems, namely coordinated cellular network-based and distributed antenna-based ISAC systems, are investigated from interference management perspective. Experiment results indicate that it is beneficial to collaboratively employ different interference mitigation techniques and leverage the network structure to achieve the full potential of network-level ISAC. Finally, we highlight several promising future research directions for the design of ISAC systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09974v1-abstract-full').style.display = 'none'; document.getElementById('2402.09974v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures, and the relevant simulation code can be found at https://dongfang-xu.github.io/homepage/code/Two_cases.zip</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09463">arXiv:2402.09463</a> <span> [<a href="https://arxiv.org/pdf/2402.09463">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TMI.2024.3485554">10.1109/TMI.2024.3485554 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multi-Center Fetal Brain Tissue Annotation (FeTA) Challenge 2022 Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Payette%2C+K">Kelly Payette</a>, <a href="/search/eess?searchtype=author&query=Steger%2C+C">C茅line Steger</a>, <a href="/search/eess?searchtype=author&query=Licandro%2C+R">Roxane Licandro</a>, <a href="/search/eess?searchtype=author&query=de+Dumast%2C+P">Priscille de Dumast</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H+B">Hongwei Bran Li</a>, <a href="/search/eess?searchtype=author&query=Barkovich%2C+M">Matthew Barkovich</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Liu Li</a>, <a href="/search/eess?searchtype=author&query=Dannecker%2C+M">Maik Dannecker</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/eess?searchtype=author&query=McConnell%2C+N">Niccol貌 McConnell</a>, <a href="/search/eess?searchtype=author&query=Miron%2C+A">Alina Miron</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yongmin Li</a>, <a href="/search/eess?searchtype=author&query=Uus%2C+A">Alena Uus</a>, <a href="/search/eess?searchtype=author&query=Grigorescu%2C+I">Irina Grigorescu</a>, <a href="/search/eess?searchtype=author&query=Gilliland%2C+P+R">Paula Ramirez Gilliland</a>, <a href="/search/eess?searchtype=author&query=Siddiquee%2C+M+M+R">Md Mahfuzur Rahman Siddiquee</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Daguang Xu</a>, <a href="/search/eess?searchtype=author&query=Myronenko%2C+A">Andriy Myronenko</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Ziyan Huang</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J">Jin Ye</a>, <a href="/search/eess?searchtype=author&query=Aleny%C3%A0%2C+M">Mireia Aleny脿</a>, <a href="/search/eess?searchtype=author&query=Comte%2C+V">Valentin Comte</a>, <a href="/search/eess?searchtype=author&query=Camara%2C+O">Oscar Camara</a> , et al. (42 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09463v1-abstract-short" style="display: inline;"> Segmentation is a critical step in analyzing the developing human fetal brain. There have been vast improvements in automatic segmentation methods in the past several years, and the Fetal Brain Tissue Annotation (FeTA) Challenge 2021 helped to establish an excellent standard of fetal brain segmentation. However, FeTA 2021 was a single center study, and the generalizability of algorithms across dif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09463v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09463v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09463v1-abstract-full" style="display: none;"> Segmentation is a critical step in analyzing the developing human fetal brain. There have been vast improvements in automatic segmentation methods in the past several years, and the Fetal Brain Tissue Annotation (FeTA) Challenge 2021 helped to establish an excellent standard of fetal brain segmentation. However, FeTA 2021 was a single center study, and the generalizability of algorithms across different imaging centers remains unsolved, limiting real-world clinical applicability. The multi-center FeTA Challenge 2022 focuses on advancing the generalizability of fetal brain segmentation algorithms for magnetic resonance imaging (MRI). In FeTA 2022, the training dataset contained images and corresponding manually annotated multi-class labels from two imaging centers, and the testing data contained images from these two imaging centers as well as two additional unseen centers. The data from different centers varied in many aspects, including scanners used, imaging parameters, and fetal brain super-resolution algorithms applied. 16 teams participated in the challenge, and 17 algorithms were evaluated. Here, a detailed overview and analysis of the challenge results are provided, focusing on the generalizability of the submissions. Both in- and out of domain, the white matter and ventricles were segmented with the highest accuracy, while the most challenging structure remains the cerebral cortex due to anatomical complexity. The FeTA Challenge 2022 was able to successfully evaluate and advance generalizability of multi-class fetal brain tissue segmentation algorithms for MRI and it continues to benchmark new algorithms. The resulting new methods contribute to improving the analysis of brain development in utero. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09463v1-abstract-full').style.display = 'none'; document.getElementById('2402.09463v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Results from FeTA Challenge 2022, held at MICCAI; Manuscript submitted to IEEE Transactions on Medical Imaging (2024). Supplementary Info (including submission methods descriptions) available here: https://zenodo.org/records/10628648</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.02678">arXiv:2401.02678</a> <span> [<a href="https://arxiv.org/pdf/2401.02678">pdf</a>, <a href="https://arxiv.org/format/2401.02678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MusicAOG: an Energy-Based Model for Learning and Sampling a Hierarchical Representation of Symbolic Music </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qian%2C+Y">Yikai Qian</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tianle Wang</a>, <a href="/search/eess?searchtype=author&query=Tong%2C+X">Xinyi Tong</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+X">Xin Jin</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Duo Xu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+B">Bo Zheng</a>, <a href="/search/eess?searchtype=author&query=Ge%2C+T">Tiezheng Ge</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F">Feng Yu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Song-Chun Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.02678v1-abstract-short" style="display: inline;"> In addressing the challenge of interpretability and generalizability of artificial music intelligence, this paper introduces a novel symbolic representation that amalgamates both explicit and implicit musical information across diverse traditions and granularities. Utilizing a hierarchical and-or graph representation, the model employs nodes and edges to encapsulate a broad spectrum of musical ele… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02678v1-abstract-full').style.display = 'inline'; document.getElementById('2401.02678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.02678v1-abstract-full" style="display: none;"> In addressing the challenge of interpretability and generalizability of artificial music intelligence, this paper introduces a novel symbolic representation that amalgamates both explicit and implicit musical information across diverse traditions and granularities. Utilizing a hierarchical and-or graph representation, the model employs nodes and edges to encapsulate a broad spectrum of musical elements, including structures, textures, rhythms, and harmonies. This hierarchical approach expands the representability across various scales of music. This representation serves as the foundation for an energy-based model, uniquely tailored to learn musical concepts through a flexible algorithm framework relying on the minimax entropy principle. Utilizing an adapted Metropolis-Hastings sampling technique, the model enables fine-grained control over music generation. A comprehensive empirical evaluation, contrasting this novel approach with existing methodologies, manifests considerable advancements in interpretability and controllability. This study marks a substantial contribution to the fields of music analysis, composition, and computational musicology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02678v1-abstract-full').style.display = 'none'; document.getElementById('2401.02678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.02192">arXiv:2401.02192</a> <span> [<a href="https://arxiv.org/pdf/2401.02192">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Nodule detection and generation on chest X-rays: NODE21 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sogancioglu%2C+E">Ecem Sogancioglu</a>, <a href="/search/eess?searchtype=author&query=van+Ginneken%2C+B">Bram van Ginneken</a>, <a href="/search/eess?searchtype=author&query=Behrendt%2C+F">Finn Behrendt</a>, <a href="/search/eess?searchtype=author&query=Bengs%2C+M">Marcel Bengs</a>, <a href="/search/eess?searchtype=author&query=Schlaefer%2C+A">Alexander Schlaefer</a>, <a href="/search/eess?searchtype=author&query=Radu%2C+M">Miron Radu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/eess?searchtype=author&query=Sheng%2C+K">Ke Sheng</a>, <a href="/search/eess?searchtype=author&query=Scalzo%2C+F">Fabien Scalzo</a>, <a href="/search/eess?searchtype=author&query=Marcus%2C+E">Eric Marcus</a>, <a href="/search/eess?searchtype=author&query=Papa%2C+S">Samuele Papa</a>, <a href="/search/eess?searchtype=author&query=Teuwen%2C+J">Jonas Teuwen</a>, <a href="/search/eess?searchtype=author&query=Scholten%2C+E+T">Ernst Th. Scholten</a>, <a href="/search/eess?searchtype=author&query=Schalekamp%2C+S">Steven Schalekamp</a>, <a href="/search/eess?searchtype=author&query=Hendrix%2C+N">Nils Hendrix</a>, <a href="/search/eess?searchtype=author&query=Jacobs%2C+C">Colin Jacobs</a>, <a href="/search/eess?searchtype=author&query=Hendrix%2C+W">Ward Hendrix</a>, <a href="/search/eess?searchtype=author&query=S%C3%A1nchez%2C+C+I">Clara I S谩nchez</a>, <a href="/search/eess?searchtype=author&query=Murphy%2C+K">Keelin Murphy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.02192v1-abstract-short" style="display: inline;"> Pulmonary nodules may be an early manifestation of lung cancer, the leading cause of cancer-related deaths among both men and women. Numerous studies have established that deep learning methods can yield high-performance levels in the detection of lung nodules in chest X-rays. However, the lack of gold-standard public datasets slows down the progression of the research and prevents benchmarking of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02192v1-abstract-full').style.display = 'inline'; document.getElementById('2401.02192v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.02192v1-abstract-full" style="display: none;"> Pulmonary nodules may be an early manifestation of lung cancer, the leading cause of cancer-related deaths among both men and women. Numerous studies have established that deep learning methods can yield high-performance levels in the detection of lung nodules in chest X-rays. However, the lack of gold-standard public datasets slows down the progression of the research and prevents benchmarking of methods for this task. To address this, we organized a public research challenge, NODE21, aimed at the detection and generation of lung nodules in chest X-rays. While the detection track assesses state-of-the-art nodule detection systems, the generation track determines the utility of nodule generation algorithms to augment training data and hence improve the performance of the detection systems. This paper summarizes the results of the NODE21 challenge and performs extensive additional experiments to examine the impact of the synthetically generated nodule training images on the detection algorithm performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02192v1-abstract-full').style.display = 'none'; document.getElementById('2401.02192v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16771">arXiv:2311.16771</a> <span> [<a href="https://arxiv.org/pdf/2311.16771">pdf</a>, <a href="https://arxiv.org/format/2311.16771">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> The HR-Calculus: Enabling Information Processing with Quaternion Algebra </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Mandic%2C+D+P">Danilo P. Mandic</a>, <a href="/search/eess?searchtype=author&query=Talebi%2C+S+P">Sayed Pouria Talebi</a>, <a href="/search/eess?searchtype=author&query=Took%2C+C+C">Clive Cheong Took</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+Y">Yili Xia</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongpo Xu</a>, <a href="/search/eess?searchtype=author&query=Xiang%2C+M">Min Xiang</a>, <a href="/search/eess?searchtype=author&query=Bourigault%2C+P">Pauline Bourigault</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16771v2-abstract-short" style="display: inline;"> From their inception, quaternions and their division algebra have proven to be advantageous in modelling rotation/orientation in three-dimensional spaces and have seen use from the initial formulation of electromagnetic filed theory through to forming the basis of quantum filed theory. Despite their impressive versatility in modelling real-world phenomena, adaptive information processing technique… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16771v2-abstract-full').style.display = 'inline'; document.getElementById('2311.16771v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16771v2-abstract-full" style="display: none;"> From their inception, quaternions and their division algebra have proven to be advantageous in modelling rotation/orientation in three-dimensional spaces and have seen use from the initial formulation of electromagnetic filed theory through to forming the basis of quantum filed theory. Despite their impressive versatility in modelling real-world phenomena, adaptive information processing techniques specifically designed for quaternion-valued signals have only recently come to the attention of the machine learning, signal processing, and control communities. The most important development in this direction is introduction of the HR-calculus, which provides the required mathematical foundation for deriving adaptive information processing techniques directly in the quaternion domain. In this article, the foundations of the HR-calculus are revised and the required tools for deriving adaptive learning techniques suitable for dealing with quaternion-valued signals, such as the gradient operator, chain and product derivative rules, and Taylor series expansion are presented. This serves to establish the most important applications of adaptive information processing in the quaternion domain for both single-node and multi-node formulations. The article is supported by Supplementary Material, which will be referred to as SM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16771v2-abstract-full').style.display = 'none'; document.getElementById('2311.16771v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.08829">arXiv:2311.08829</a> <span> [<a href="https://arxiv.org/pdf/2311.08829">pdf</a>, <a href="https://arxiv.org/format/2311.08829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Autoencoder with Group-based Decoder and Multi-task Optimization for Anomalous Sound Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yifan Zhou</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongxing Xu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+H">Haoran Wei</a>, <a href="/search/eess?searchtype=author&query=Long%2C+Y">Yanhua Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.08829v1-abstract-short" style="display: inline;"> In industry, machine anomalous sound detection (ASD) is in great demand. However, collecting enough abnormal samples is difficult due to the high cost, which boosts the rapid development of unsupervised ASD algorithms. Autoencoder (AE) based methods have been widely used for unsupervised ASD, but suffer from problems including 'shortcut', poor anti-noise ability and sub-optimal quality of features… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08829v1-abstract-full').style.display = 'inline'; document.getElementById('2311.08829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.08829v1-abstract-full" style="display: none;"> In industry, machine anomalous sound detection (ASD) is in great demand. However, collecting enough abnormal samples is difficult due to the high cost, which boosts the rapid development of unsupervised ASD algorithms. Autoencoder (AE) based methods have been widely used for unsupervised ASD, but suffer from problems including 'shortcut', poor anti-noise ability and sub-optimal quality of features. To address these challenges, we propose a new AE-based framework termed AEGM. Specifically, we first insert an auxiliary classifier into AE to enhance ASD in a multi-task learning manner. Then, we design a group-based decoder structure, accompanied by an adaptive loss function, to endow the model with domain-specific knowledge. Results on the DCASE 2021 Task 2 development set show that our methods achieve a relative improvement of 13.11% and 15.20% respectively in average AUC over the official AE and MobileNetV2 across test sets of seven machines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08829v1-abstract-full').style.display = 'none'; document.getElementById('2311.08829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to the 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02376">arXiv:2311.02376</a> <span> [<a href="https://arxiv.org/pdf/2311.02376">pdf</a>, <a href="https://arxiv.org/ps/2311.02376">ps</a>, <a href="https://arxiv.org/format/2311.02376">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Reflecting Surface-Aided Wireless Communication with Movable Elements </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+G">Guojie Hu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dognhui Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kui Xu</a>, <a href="/search/eess?searchtype=author&query=Si%2C+J">Jiangbo Si</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yunlong Cai</a>, <a href="/search/eess?searchtype=author&query=Al-Dhahir%2C+N">Naofal Al-Dhahir</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02376v1-abstract-short" style="display: inline;"> Intelligent reflecting surface (IRS) has been recognized as a powerful technology for boosting communication performance. To reduce manufacturing and control costs, it is preferable to consider discrete phase shifts (DPSs) for IRS, which are set by default as uniformly distributed in the range of $[ - 蟺,蟺)$ in the literature. Such setting, however, cannot achieve a desirable performance over the g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02376v1-abstract-full').style.display = 'inline'; document.getElementById('2311.02376v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02376v1-abstract-full" style="display: none;"> Intelligent reflecting surface (IRS) has been recognized as a powerful technology for boosting communication performance. To reduce manufacturing and control costs, it is preferable to consider discrete phase shifts (DPSs) for IRS, which are set by default as uniformly distributed in the range of $[ - 蟺,蟺)$ in the literature. Such setting, however, cannot achieve a desirable performance over the general Rician fading where the channel phase concentrates in a narrow range with a higher probability. Motivated by this drawback, we in this paper design optimal non-uniform DPSs for IRS to achieve a desirable performance level. The fundamental challenge is the \textit{possible offset in phase distribution across different cascaded source-element-destination channels}, if adopting conventional IRS where the position of each element is fixed. Such phenomenon leads to different patterns of optimal non-uniform DPSs for each IRS element and thus causes huge manufacturing costs especially when the number of IRS elements is large. Driven by the recently emerging fluid antenna system (or movable antenna technology), we demonstrate that if the position of each IRS element can be flexibly adjusted, the above phase distribution offset can be surprisingly eliminated, leading to the same pattern of DPSs for each IRS element. Armed with this, we then determine the form of unified non-uniform DPSs based on a low-complexity iterative algorithm. Simulations show that our proposed design significantly improves the system performance compared to competitive benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02376v1-abstract-full').style.display = 'none'; document.getElementById('2311.02376v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00188">arXiv:2311.00188</a> <span> [<a href="https://arxiv.org/pdf/2311.00188">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> A Two-Step Framework for Multi-Material Decomposition of Dual Energy Computed Tomography from Projection Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/eess?searchtype=author&query=Lyu%2C+Q">Qihui Lyu</a>, <a href="/search/eess?searchtype=author&query=Ruan%2C+D">Dan Ruan</a>, <a href="/search/eess?searchtype=author&query=Sheng%2C+K">Ke Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00188v1-abstract-short" style="display: inline;"> Dual-energy computed tomography (DECT) utilizes separate X-ray energy spectra to improve multi-material decomposition (MMD) for various diagnostic applications. However accurate decomposing more than two types of material remains challenging using conventional methods. Deep learning (DL) methods have shown promise to improve the MMD performance, but typical approaches of conducing DL-MMD in the im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00188v1-abstract-full').style.display = 'inline'; document.getElementById('2311.00188v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00188v1-abstract-full" style="display: none;"> Dual-energy computed tomography (DECT) utilizes separate X-ray energy spectra to improve multi-material decomposition (MMD) for various diagnostic applications. However accurate decomposing more than two types of material remains challenging using conventional methods. Deep learning (DL) methods have shown promise to improve the MMD performance, but typical approaches of conducing DL-MMD in the image domain fail to fully utilize projection information or under iterative setup are computationally inefficient in both training and prediction. In this work, we present a clinical-applicable MMD (>2) framework rFast-MMDNet, operating with raw projection data in non-recursive setup, for breast tissue differentiation. rFast-MMDNet is a two-stage algorithm, including stage-one SinoNet to perform dual energy projection decomposition on tissue sinograms and stage-two FBP-DenoiseNet to perform domain adaptation and image post-processing. rFast-MMDNet was tested on a 2022 DL-Spectral-Challenge breast phantom dataset. The two stages of rFast-MMDNet were evaluated separately and then compared with four noniterative reference methods including a direct inversion method (AA-MMD), an image domain DL method (ID-UNet), AA-MMD/ID-UNet + DenoiseNet and a sinogram domain DL method (Triple-CBCT). Our results show that models trained from information stored in DE transmission domain can yield high-fidelity decomposition of the adipose, calcification, and fibroglandular materials with averaged RMSE, MAE, negative PSNR, and SSIM of 0.004+/-~0, 0.001+/-~0, -45.027+/-~0.542, and 0.002+/-~0 benchmarking to the ground truth, respectively. Training of entire rFast-MMDNet on a 4xRTX A6000 GPU cluster took a day with inference time <1s. All DL methods generally led to more accurate MMD than AA-MMD. rFast-MMDNet outperformed Triple-CBCT, but both are superior to the image-domain based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00188v1-abstract-full').style.display = 'none'; document.getElementById('2311.00188v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAPM 2023 Dl-spectral Challenge Summary</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04114">arXiv:2310.04114</a> <span> [<a href="https://arxiv.org/pdf/2310.04114">pdf</a>, <a href="https://arxiv.org/format/2310.04114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Aorta Segmentation from 3D CT in MICCAI SEG.A. 2023 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Myronenko%2C+A">Andriy Myronenko</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+D">Dong Yang</a>, <a href="/search/eess?searchtype=author&query=He%2C+Y">Yufan He</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Daguang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04114v1-abstract-short" style="display: inline;"> Aorta provides the main blood supply of the body. Screening of aorta with imaging helps for early aortic disease detection and monitoring. In this work, we describe our solution to the Segmentation of the Aorta (SEG.A.231) from 3D CT challenge. We use automated segmentation method Auto3DSeg available in MONAI. Our solution achieves an average Dice score of 0.920 and 95th percentile of the Hausdorf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04114v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04114v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04114v1-abstract-full" style="display: none;"> Aorta provides the main blood supply of the body. Screening of aorta with imaging helps for early aortic disease detection and monitoring. In this work, we describe our solution to the Segmentation of the Aorta (SEG.A.231) from 3D CT challenge. We use automated segmentation method Auto3DSeg available in MONAI. Our solution achieves an average Dice score of 0.920 and 95th percentile of the Hausdorff Distance (HD95) of 6.013, which ranks first and wins the SEG.A. 2023 challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04114v1-abstract-full').style.display = 'none'; document.getElementById('2310.04114v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2023, SEG.A. 2023 challenge 1st place</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.10227">arXiv:2309.10227</a> <span> [<a href="https://arxiv.org/pdf/2309.10227">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Dynamic MRI Reconstruction with Convolutional Network Assisted Reconstruction Swin Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hengjie Liu</a>, <a href="/search/eess?searchtype=author&query=Ruan%2C+D">Dan Ruan</a>, <a href="/search/eess?searchtype=author&query=Sheng%2C+K">Ke Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.10227v1-abstract-short" style="display: inline;"> Dynamic magnetic resonance imaging (DMRI) is an effective imaging tool for diagnosis tasks that require motion tracking of a certain anatomy. To speed up DMRI acquisition, k-space measurements are commonly undersampled along spatial or spatial-temporal domains. The difficulty of recovering useful information increases with increasing undersampling ratios. Compress sensing was invented for this pur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10227v1-abstract-full').style.display = 'inline'; document.getElementById('2309.10227v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.10227v1-abstract-full" style="display: none;"> Dynamic magnetic resonance imaging (DMRI) is an effective imaging tool for diagnosis tasks that require motion tracking of a certain anatomy. To speed up DMRI acquisition, k-space measurements are commonly undersampled along spatial or spatial-temporal domains. The difficulty of recovering useful information increases with increasing undersampling ratios. Compress sensing was invented for this purpose and has become the most popular method until deep learning (DL) based DMRI reconstruction methods emerged in the past decade. Nevertheless, existing DL networks are still limited in long-range sequential dependency understanding and computational efficiency and are not fully automated. Considering the success of Transformers positional embedding and "swin window" self-attention mechanism in the vision community, especially natural video understanding, we hereby propose a novel architecture named Reconstruction Swin Transformer (RST) for 4D MRI. RST inherits the backbone design of the Video Swin Transformer with a novel reconstruction head introduced to restore pixel-wise intensity. A convolution network called SADXNet is used for rapid initialization of 2D MR frames before RST learning to effectively reduce the model complexity, GPU hardware demand, and training time. Experimental results in the cardiac 4D MR dataset further substantiate the superiority of RST, achieving the lowest RMSE of 0.0286 +/- 0.0199 and 1 - SSIM of 0.0872 +/- 0.0783 on 9 times accelerated validation sequences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10227v1-abstract-full').style.display = 'none'; document.getElementById('2309.10227v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2023 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.02171">arXiv:2309.02171</a> <span> [<a href="https://arxiv.org/pdf/2309.02171">pdf</a>, <a href="https://arxiv.org/format/2309.02171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Wideband MIMO Channel Model for Aerial Intelligent Reflecting Surface-Assisted Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shaoyi Liu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+N">Nan Ma</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yaning Chen</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+K">Ke Peng</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+D">Dongsheng Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.02171v1-abstract-short" style="display: inline;"> Compared to traditional intelligent reflecting surfaces(IRS), aerial IRS (AIRS) has unique advantages, such as more flexible deployment and wider service coverage. However, modeling AIRS in the channel presents new challenges due to their mobility. In this paper, a three-dimensional (3D) wideband channel model for AIRS and IRS joint-assisted multiple-input multiple-output (MIMO) communication syst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02171v1-abstract-full').style.display = 'inline'; document.getElementById('2309.02171v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.02171v1-abstract-full" style="display: none;"> Compared to traditional intelligent reflecting surfaces(IRS), aerial IRS (AIRS) has unique advantages, such as more flexible deployment and wider service coverage. However, modeling AIRS in the channel presents new challenges due to their mobility. In this paper, a three-dimensional (3D) wideband channel model for AIRS and IRS joint-assisted multiple-input multiple-output (MIMO) communication system is proposed, where considering the rotational degrees of freedom in three directions and the motion angles of AIRS in space. Based on the proposed model, the channel impulse response (CIR), correlation function, and channel capacity are derived, and several feasible joint phase shifts schemes for AIRS and IRS units are proposed. Simulation results show that the proposed model can capture the channel characteristics accurately, and the proposed phase shifts methods can effectively improve the channel statistical characteristics and increase the system capacity. Additionally, we observe that in certain scenarios, the paths involving the IRS and the line-of-sight (LoS) paths exhibit similar characteristics. These findings provide valuable insights for the future development of intelligent communication systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02171v1-abstract-full').style.display = 'none'; document.getElementById('2309.02171v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.15696">arXiv:2308.15696</a> <span> [<a href="https://arxiv.org/pdf/2308.15696">pdf</a>, <a href="https://arxiv.org/format/2308.15696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Implementation and Evaluation of Physical Layer Key Generation on SDR based LoRa Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yingying Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongyang Xu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+T">Tiantian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.15696v1-abstract-short" style="display: inline;"> Physical layer key generation technology which leverages channel randomness to generate secret keys has attracted extensive attentions in long range (LoRa)-based networks recently. We in this paper develop a software-defined radio (SDR) based LoRa communications platform using GNU Radio on universal software radio peripheral (USRP) to implement and evaluate typical physical layer key generation sc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15696v1-abstract-full').style.display = 'inline'; document.getElementById('2308.15696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.15696v1-abstract-full" style="display: none;"> Physical layer key generation technology which leverages channel randomness to generate secret keys has attracted extensive attentions in long range (LoRa)-based networks recently. We in this paper develop a software-defined radio (SDR) based LoRa communications platform using GNU Radio on universal software radio peripheral (USRP) to implement and evaluate typical physical layer key generation schemes. Thanks to the flexibility and configurability of GNU Radio to extract LoRa packets, we are able to obtain the fine-grained channel frequency response (CFR) through LoRa preamble based channel estimation for key generation. Besides, we propose a lowcomplexity preprocessing method to enhance the randomness of quantization while reducing the secret key disagreement ratio. The results indicate that we can achieve 367 key bits with a high level of randomness through just a single effective channel probing in an indoor environment at a distance of 2 meters under the circumstance of a spreading factor (SF) of 7, a preamble length of 8, a signal bandwidth of 250 kHz, and a sampling rate of 1 MHz. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15696v1-abstract-full').style.display = 'none'; document.getElementById('2308.15696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE VTC2023 Fall</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.12526">arXiv:2308.12526</a> <span> [<a href="https://arxiv.org/pdf/2308.12526">pdf</a>, <a href="https://arxiv.org/format/2308.12526">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> UNISOUND System for VoxCeleb Speaker Recognition Challenge 2023 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yu Zheng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yajun Zhang</a>, <a href="/search/eess?searchtype=author&query=Niu%2C+C">Chuanying Niu</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+Y">Yibin Zhan</a>, <a href="/search/eess?searchtype=author&query=Long%2C+Y">Yanhua Long</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongxing Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.12526v1-abstract-short" style="display: inline;"> This report describes the UNISOUND submission for Track1 and Track2 of VoxCeleb Speaker Recognition Challenge 2023 (VoxSRC 2023). We submit the same system on Track 1 and Track 2, which is trained with only VoxCeleb2-dev. Large-scale ResNet and RepVGG architectures are developed for the challenge. We propose a consistency-aware score calibration method, which leverages the stability of audio voice… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12526v1-abstract-full').style.display = 'inline'; document.getElementById('2308.12526v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.12526v1-abstract-full" style="display: none;"> This report describes the UNISOUND submission for Track1 and Track2 of VoxCeleb Speaker Recognition Challenge 2023 (VoxSRC 2023). We submit the same system on Track 1 and Track 2, which is trained with only VoxCeleb2-dev. Large-scale ResNet and RepVGG architectures are developed for the challenge. We propose a consistency-aware score calibration method, which leverages the stability of audio voiceprints in similarity score by a Consistency Measure Factor (CMF). CMF brings a huge performance boost in this challenge. Our final system is a fusion of six models and achieves the first place in Track 1 and second place in Track 2 of VoxSRC 2023. The minDCF of our submission is 0.0855 and the EER is 1.5880%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12526v1-abstract-full').style.display = 'none'; document.getElementById('2308.12526v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.05760">arXiv:2308.05760</a> <span> [<a href="https://arxiv.org/pdf/2308.05760">pdf</a>, <a href="https://arxiv.org/format/2308.05760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Unified Statistical Channel Modeling and performance analysis of Vertical Underwater Wireless Optical Communication Links considering Turbulence-Induced Fading </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongling Xu</a>, <a href="/search/eess?searchtype=author&query=Yi%2C+X">Xiang Yi</a>, <a href="/search/eess?searchtype=author&query=Ata%2C+Y">Yal莽n Ata</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+X">Xinyue Tao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuxuan Li</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+P">Peng Yue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.05760v1-abstract-short" style="display: inline;"> The reliability of a vertical underwater wireless optical communication (UWOC) network is seriously impacted by turbulence-induced fading due to fluctuations in the water temperature and salinity, which vary with depth. To better assess the vertical UWOC system performances, an accurate probability distribution function (PDF) model that can describe this fading is indispensable. In view of the lim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05760v1-abstract-full').style.display = 'inline'; document.getElementById('2308.05760v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.05760v1-abstract-full" style="display: none;"> The reliability of a vertical underwater wireless optical communication (UWOC) network is seriously impacted by turbulence-induced fading due to fluctuations in the water temperature and salinity, which vary with depth. To better assess the vertical UWOC system performances, an accurate probability distribution function (PDF) model that can describe this fading is indispensable. In view of the limitations of theoretical and experimental studies, this paper is the first to establish a more accurate modeling scheme for wave optics simulation (WOS) by fully considering the constraints of sampling conditions on multi-phase screen parameters. On this basis, we complete the modeling of light propagation in a vertical oceanic turbulence channel and subsequently propose a unified statistical model named mixture Weibull-generalized Gamma (WGG) distribution model to characterize turbulence-induced fading in vertical links. Interestingly, the WGG model is shown to provide a perfect fit with the acquired data under all considered channel conditions. We further show that the application of the WGG model leads to closed-form and analytically tractable expressions for key UWOC system performance metrics such as the average bit-error rate (BER). The presented results give valuable insight into the practical aspects of development of UWOC networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05760v1-abstract-full').style.display = 'none'; document.getElementById('2308.05760v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.02304">arXiv:2308.02304</a> <span> [<a href="https://arxiv.org/pdf/2308.02304">pdf</a>, <a href="https://arxiv.org/format/2308.02304">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Movable Antenna-Enhanced Multiuser Communication: Optimal Discrete Antenna Positioning and Beamforming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yifei Wu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Gerstacker%2C+W">Wolfgang Gerstacker</a>, <a href="/search/eess?searchtype=author&query=Schober%2C+R">Robert Schober</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.02304v1-abstract-short" style="display: inline;"> Movable antennas (MAs) are a promising paradigm to enhance the spatial degrees of freedom of conventional multi-antenna systems by flexibly adapting the positions of the antenna elements within a given transmit area. In this paper, we model the motion of the MA elements as discrete movements and study the corresponding resource allocation problem for MA-enabled multiuser multiple-input single-outp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02304v1-abstract-full').style.display = 'inline'; document.getElementById('2308.02304v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.02304v1-abstract-full" style="display: none;"> Movable antennas (MAs) are a promising paradigm to enhance the spatial degrees of freedom of conventional multi-antenna systems by flexibly adapting the positions of the antenna elements within a given transmit area. In this paper, we model the motion of the MA elements as discrete movements and study the corresponding resource allocation problem for MA-enabled multiuser multiple-input single-output (MISO) communication systems. Specifically, we jointly optimize the beamforming and the MA positions at the base station (BS) for the minimization of the total transmit power while guaranteeing the minimum required signal-to-interference-plus-noise ratio (SINR) of each individual user. To obtain the globally optimal solution to the formulated resource allocation problem, we develop an iterative algorithm capitalizing on the generalized Bender's decomposition with guaranteed convergence. Our numerical results demonstrate that the proposed MA-enabled communication system can significantly reduce the BS transmit power and the number of antenna elements needed to achieve a desired performance compared to state-of-the-art techniques, such as antenna selection. Furthermore, we observe that refining the step size of the MA motion driver improves performance at the expense of a higher computational complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02304v1-abstract-full').style.display = 'none'; document.getElementById('2308.02304v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.06634">arXiv:2307.06634</a> <span> [<a href="https://arxiv.org/pdf/2307.06634">pdf</a>, <a href="https://arxiv.org/ps/2307.06634">ps</a>, <a href="https://arxiv.org/format/2307.06634">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Coherent Compensation based ISAC Signal Processing for Long-range Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+L">Lin Wang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+Z">Zhiqing Wei</a>, <a href="/search/eess?searchtype=author&query=Su%2C+L">Liyan Su</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+Z">Zhiyong Feng</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Huici Wu</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+D">Dongsheng Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.06634v1-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) will greatly enhance the efficiency of physical resource utilization. The design of ISAC signal based on the orthogonal frequency division multiplex (OFDM) signal is the mainstream. However, when detecting the long-range target, the delay of echo signal exceeds CP duration, which will result in inter-symbol interference (ISI) and inter-carrier interferen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.06634v1-abstract-full').style.display = 'inline'; document.getElementById('2307.06634v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.06634v1-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) will greatly enhance the efficiency of physical resource utilization. The design of ISAC signal based on the orthogonal frequency division multiplex (OFDM) signal is the mainstream. However, when detecting the long-range target, the delay of echo signal exceeds CP duration, which will result in inter-symbol interference (ISI) and inter-carrier interference (ICI), limiting the sensing range. Facing the above problem, we propose to increase useful signal power through coherent compensation and improve the signal to interference plus noise power ratio (SINR) of each OFDM block. Compared with the traditional 2D-FFT algorithm, the improvement of SINR of range-doppler map (RDM) is verified by simulation, which will expand the sensing range. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.06634v1-abstract-full').style.display = 'none'; document.getElementById('2307.06634v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.10476">arXiv:2306.10476</a> <span> [<a href="https://arxiv.org/pdf/2306.10476">pdf</a>, <a href="https://arxiv.org/format/2306.10476">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Bid Optimization for Offsite Display Ad Campaigns on eCommerce </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Hangjian Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dong Xu</a>, <a href="/search/eess?searchtype=author&query=Shmakov%2C+K">Konstantin Shmakov</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+K">Kuang-Chih Lee</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+W">Wei Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.10476v2-abstract-short" style="display: inline;"> Online retailers often use third-party demand-side-platforms (DSPs) to conduct offsite advertising and reach shoppers across the Internet on behalf of their advertisers. The process involves the retailer participating in instant auctions with real-time bidding for each ad slot of their interest. In this paper, we introduce a bid optimization system that leverages the dimensional bidding function p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10476v2-abstract-full').style.display = 'inline'; document.getElementById('2306.10476v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.10476v2-abstract-full" style="display: none;"> Online retailers often use third-party demand-side-platforms (DSPs) to conduct offsite advertising and reach shoppers across the Internet on behalf of their advertisers. The process involves the retailer participating in instant auctions with real-time bidding for each ad slot of their interest. In this paper, we introduce a bid optimization system that leverages the dimensional bidding function provided by most well-known DSPs for Walmart offsite display ad campaigns. The system starts by automatically searching for the optimal segmentation of the ad requests space based on their characteristics such as geo location, time, ad format, serving website, device type, etc. Then, it assesses the quality of impressions observed from each dimension based on revenue signals driven by the campaign effect. During the campaign, the system iteratively approximates the bid landscape based on the data observed and calculates the bid adjustments for each dimension. Finally, a higher bid adjustment factor is applied to dimensions with potentially higher revenue over ad spend (ROAS), and vice versa. The initial A/B test results of the proposed optimization system has shown its effectiveness of increasing the ROAS and conversion rate while reducing the effective cost per mille for ad serving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10476v2-abstract-full').style.display = 'none'; document.getElementById('2306.10476v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Workshop on Decision Intelligence and Analytics for Online Marketplaces, KDD 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10655">arXiv:2305.10655</a> <span> [<a href="https://arxiv.org/pdf/2305.10655">pdf</a>, <a href="https://arxiv.org/format/2305.10655">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-17027-0_2">10.1007/978-3-031-17027-0_2 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DeepEdit: Deep Editable Learning for Interactive Segmentation of 3D Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Diaz-Pinto%2C+A">Andres Diaz-Pinto</a>, <a href="/search/eess?searchtype=author&query=Mehta%2C+P">Pritesh Mehta</a>, <a href="/search/eess?searchtype=author&query=Alle%2C+S">Sachidanand Alle</a>, <a href="/search/eess?searchtype=author&query=Asad%2C+M">Muhammad Asad</a>, <a href="/search/eess?searchtype=author&query=Brown%2C+R">Richard Brown</a>, <a href="/search/eess?searchtype=author&query=Nath%2C+V">Vishwesh Nath</a>, <a href="/search/eess?searchtype=author&query=Ihsani%2C+A">Alvin Ihsani</a>, <a href="/search/eess?searchtype=author&query=Antonelli%2C+M">Michela Antonelli</a>, <a href="/search/eess?searchtype=author&query=Palkovics%2C+D">Daniel Palkovics</a>, <a href="/search/eess?searchtype=author&query=Pinter%2C+C">Csaba Pinter</a>, <a href="/search/eess?searchtype=author&query=Alkalay%2C+R">Ron Alkalay</a>, <a href="/search/eess?searchtype=author&query=Pieper%2C+S">Steve Pieper</a>, <a href="/search/eess?searchtype=author&query=Roth%2C+H+R">Holger R. Roth</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Daguang Xu</a>, <a href="/search/eess?searchtype=author&query=Dogra%2C+P">Prerna Dogra</a>, <a href="/search/eess?searchtype=author&query=Vercauteren%2C+T">Tom Vercauteren</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+A">Andrew Feng</a>, <a href="/search/eess?searchtype=author&query=Quraini%2C+A">Abood Quraini</a>, <a href="/search/eess?searchtype=author&query=Ourselin%2C+S">Sebastien Ourselin</a>, <a href="/search/eess?searchtype=author&query=Cardoso%2C+M+J">M. Jorge Cardoso</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10655v1-abstract-short" style="display: inline;"> Automatic segmentation of medical images is a key step for diagnostic and interventional tasks. However, achieving this requires large amounts of annotated volumes, which can be tedious and time-consuming task for expert annotators. In this paper, we introduce DeepEdit, a deep learning-based method for volumetric medical image annotation, that allows automatic and semi-automatic segmentation, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10655v1-abstract-full').style.display = 'inline'; document.getElementById('2305.10655v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10655v1-abstract-full" style="display: none;"> Automatic segmentation of medical images is a key step for diagnostic and interventional tasks. However, achieving this requires large amounts of annotated volumes, which can be tedious and time-consuming task for expert annotators. In this paper, we introduce DeepEdit, a deep learning-based method for volumetric medical image annotation, that allows automatic and semi-automatic segmentation, and click-based refinement. DeepEdit combines the power of two methods: a non-interactive (i.e. automatic segmentation using nnU-Net, UNET or UNETR) and an interactive segmentation method (i.e. DeepGrow), into a single deep learning model. It allows easy integration of uncertainty-based ranking strategies (i.e. aleatoric and epistemic uncertainty computation) and active learning. We propose and implement a method for training DeepEdit by using standard training combined with user interaction simulation. Once trained, DeepEdit allows clinicians to quickly segment their datasets by using the algorithm in auto segmentation mode or by providing clicks via a user interface (i.e. 3D Slicer, OHIF). We show the value of DeepEdit through evaluation on the PROSTATEx dataset for prostate/prostatic lesions and the Multi-Atlas Labeling Beyond the Cranial Vault (BTCV) dataset for abdominal CT segmentation, using state-of-the-art network architectures as baseline for comparison. DeepEdit could reduce the time and effort annotating 3D medical images compared to DeepGrow alone. Source code is available at https://github.com/Project-MONAI/MONAILabel <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10655v1-abstract-full').style.display = 'none'; document.getElementById('2305.10655v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.09302">arXiv:2305.09302</a> <span> [<a href="https://arxiv.org/pdf/2305.09302">pdf</a>, <a href="https://arxiv.org/format/2305.09302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Pink-Eggs Dataset V1: A Step Toward Invasive Species Management Using Deep Learning Embedded Solutions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/eess?searchtype=author&query=Hao%2C+X">Xiang Hao</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+X">Xin Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.09302v1-abstract-short" style="display: inline;"> We introduce a novel dataset consisting of images depicting pink eggs that have been identified as Pomacea canaliculata eggs, accompanied by corresponding bounding box annotations. The purpose of this dataset is to aid researchers in the analysis of the spread of Pomacea canaliculata species by utilizing deep learning techniques, as well as supporting other investigative pursuits that require visu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.09302v1-abstract-full').style.display = 'inline'; document.getElementById('2305.09302v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.09302v1-abstract-full" style="display: none;"> We introduce a novel dataset consisting of images depicting pink eggs that have been identified as Pomacea canaliculata eggs, accompanied by corresponding bounding box annotations. The purpose of this dataset is to aid researchers in the analysis of the spread of Pomacea canaliculata species by utilizing deep learning techniques, as well as supporting other investigative pursuits that require visual data pertaining to the eggs of Pomacea canaliculata. It is worth noting, however, that the identity of the eggs in question is not definitively established, as other species within the same taxonomic family have been observed to lay similar-looking eggs in regions of the Americas. Therefore, a crucial prerequisite to any decision regarding the elimination of these eggs would be to establish with certainty whether they are exclusively attributable to invasive Pomacea canaliculata or if other species are also involved. The dataset is available at https://www.kaggle.com/datasets/deeshenzhen/pinkeggs <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.09302v1-abstract-full').style.display = 'none'; document.getElementById('2305.09302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> 02 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.06879">arXiv:2305.06879</a> <span> [<a href="https://arxiv.org/pdf/2305.06879">pdf</a>, <a href="https://arxiv.org/ps/2305.06879">ps</a>, <a href="https://arxiv.org/format/2305.06879">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSP.2023.3328053">10.1109/TSP.2023.3328053 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Convex Quaternion Optimization for Signal Processing: Theory and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+S">Shuning Sun</a>, <a href="/search/eess?searchtype=author&query=Diao%2C+Q">Qiankun Diao</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongpo Xu</a>, <a href="/search/eess?searchtype=author&query=Bourigault%2C+P">Pauline Bourigault</a>, <a href="/search/eess?searchtype=author&query=Mandic%2C+D+P">Danilo P. Mandic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.06879v1-abstract-short" style="display: inline;"> Convex optimization methods have been extensively used in the fields of communications and signal processing. However, the theory of quaternion optimization is currently not as fully developed and systematic as that of complex and real optimization. To this end, we establish an essential theory of convex quaternion optimization for signal processing based on the generalized Hamilton-real (GHR) cal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06879v1-abstract-full').style.display = 'inline'; document.getElementById('2305.06879v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.06879v1-abstract-full" style="display: none;"> Convex optimization methods have been extensively used in the fields of communications and signal processing. However, the theory of quaternion optimization is currently not as fully developed and systematic as that of complex and real optimization. To this end, we establish an essential theory of convex quaternion optimization for signal processing based on the generalized Hamilton-real (GHR) calculus. This is achieved in a way which conforms with traditional complex and real optimization theory. For rigorous, We present five discriminant theorems for convex quaternion functions, and four discriminant criteria for strongly convex quaternion functions. Furthermore, we provide a fundamental theorem for the optimality of convex quaternion optimization problems, and demonstrate its utility through three applications in quaternion signal processing. These results provide a solid theoretical foundation for convex quaternion optimization and open avenues for further developments in signal processing applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06879v1-abstract-full').style.display = 'none'; document.getElementById('2305.06879v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Trans. Signal Process., vol. 71, pp. 4106-4115, Oct. 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.05265">arXiv:2305.05265</a> <span> [<a href="https://arxiv.org/pdf/2305.05265">pdf</a>, <a href="https://arxiv.org/ps/2305.05265">ps</a>, <a href="https://arxiv.org/format/2305.05265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Joint BS Selection, User Association, and Beamforming Design for Network Integrated Sensing and Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yiming Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.05265v1-abstract-short" style="display: inline;"> Different from conventional radar, the cellular network in the integrated sensing and communication (ISAC) system enables collaborative sensing by multiple sensing nodes, e.g., base stations (BSs). However, existing works normally assume designated BSs as the sensing nodes, and thus can't fully exploit the macro-diversity gain. In the paper, we propose a joint BS selection, user association, and b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05265v1-abstract-full').style.display = 'inline'; document.getElementById('2305.05265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.05265v1-abstract-full" style="display: none;"> Different from conventional radar, the cellular network in the integrated sensing and communication (ISAC) system enables collaborative sensing by multiple sensing nodes, e.g., base stations (BSs). However, existing works normally assume designated BSs as the sensing nodes, and thus can't fully exploit the macro-diversity gain. In the paper, we propose a joint BS selection, user association, and beamforming design to tackle this problem. The total transmit power is minimized while guaranteeing the communication and sensing performance measured by the signal-to-interference-plus-noise ratio (SINR) for the communication users and the Cramer-Rao lower bound (CRLB) for location estimation, respectively. An alternating optimization (AO)-based algorithm is developed to solve the non-convex problem. Simulation results validate the effectiveness of the proposed algorithm and unveil the benefits brought by collaborative sensing and BS selection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05265v1-abstract-full').style.display = 'none'; document.getElementById('2305.05265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.01213">arXiv:2305.01213</a> <span> [<a href="https://arxiv.org/pdf/2305.01213">pdf</a>, <a href="https://arxiv.org/ps/2305.01213">ps</a>, <a href="https://arxiv.org/format/2305.01213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Integrated Sensing and Communication in Coordinated Cellular Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.01213v2-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) is a promising technique to provide sensing services in future wireless networks. Numerous existing works have adopted a monostatic radar architecture to realize ISAC, i.e., employing the same base station (BS) to transmit the ISAC signal and receive the echo. Yet, the concurrent information transmission causes unavoidable self-interference (SI) to the r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.01213v2-abstract-full').style.display = 'inline'; document.getElementById('2305.01213v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.01213v2-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) is a promising technique to provide sensing services in future wireless networks. Numerous existing works have adopted a monostatic radar architecture to realize ISAC, i.e., employing the same base station (BS) to transmit the ISAC signal and receive the echo. Yet, the concurrent information transmission causes unavoidable self-interference (SI) to the radar echo at the BS. To overcome this difficulty, we propose a coordinated cellular network-supported multistatic radar architecture to implement ISAC, which allows us to spatially separate the ISAC signal transmission and radar echo reception, intrinsically circumventing the problem of SI. To this end, we jointly optimize the transmit and receive beamforming policy to minimize the sensing beam pattern mismatch error subject to ISAC quality-of-service requirements. The resulting non-convex optimization problem is tackled by an alternating optimization-based suboptimal algorithm. Simulation results showed that the proposed scheme outperforms the two baseline schemes adopting conventional designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.01213v2-abstract-full').style.display = 'none'; document.getElementById('2305.01213v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.11521">arXiv:2304.11521</a> <span> [<a href="https://arxiv.org/pdf/2304.11521">pdf</a>, <a href="https://arxiv.org/format/2304.11521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> An Order-Complexity Model for Aesthetic Quality Assessment of Homophony Music Performance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+X">Xin Jin</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+W">Wu Zhou</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jinyu Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Duo Xu</a>, <a href="/search/eess?searchtype=author&query=Rong%2C+Y">Yiqing Rong</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jialin Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.11521v1-abstract-short" style="display: inline;"> Although computational aesthetics evaluation has made certain achievements in many fields, its research of music performance remains to be explored. At present, subjective evaluation is still a ultimate method of music aesthetics research, but it will consume a lot of human and material resources. In addition, the music performance generated by AI is still mechanical, monotonous and lacking in bea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11521v1-abstract-full').style.display = 'inline'; document.getElementById('2304.11521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.11521v1-abstract-full" style="display: none;"> Although computational aesthetics evaluation has made certain achievements in many fields, its research of music performance remains to be explored. At present, subjective evaluation is still a ultimate method of music aesthetics research, but it will consume a lot of human and material resources. In addition, the music performance generated by AI is still mechanical, monotonous and lacking in beauty. In order to guide the generation task of AI music performance, and to improve the performance effect of human performers, this paper uses Birkhoff's aesthetic measure to propose a method of objective measurement of beauty. The main contributions of this paper are as follows: Firstly, we put forward an objective aesthetic evaluation method to measure the music performance aesthetic; Secondly, we propose 10 basic music features and 4 aesthetic music features. Experiments show that our method performs well on performance assessment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11521v1-abstract-full').style.display = 'none'; document.getElementById('2304.11521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AIART 2023 ICME Workshop </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.07106">arXiv:2304.07106</a> <span> [<a href="https://arxiv.org/pdf/2304.07106">pdf</a>, <a href="https://arxiv.org/ps/2304.07106">ps</a>, <a href="https://arxiv.org/format/2304.07106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Adaptation and Self-Organizing Systems">nlin.AO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chaotic Dynamics">nlin.CD</span> </div> </div> <p class="title is-5 mathjax"> Extremum Seeking Nonlinear Regulator with Concurrent Uncertainties in Exosystems and Control Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shimin Wang</a>, <a href="/search/eess?searchtype=author&query=Guay%2C+M">Martin Guay</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dabo Xu</a>, <a href="/search/eess?searchtype=author&query=Dochain%2C+D">Denis Dochain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.07106v3-abstract-short" style="display: inline;"> This paper proposes a non-adaptive control solution framework to the practical output regulation problem (PORP) for a class of nonlinear systems with uncertain parameters, unknown control directions and uncertain exosystem dynamics. The concurrence of the unknown control directions and uncertainties in both the system dynamics and the exosystem pose a significant challenge to the problem. In light… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07106v3-abstract-full').style.display = 'inline'; document.getElementById('2304.07106v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.07106v3-abstract-full" style="display: none;"> This paper proposes a non-adaptive control solution framework to the practical output regulation problem (PORP) for a class of nonlinear systems with uncertain parameters, unknown control directions and uncertain exosystem dynamics. The concurrence of the unknown control directions and uncertainties in both the system dynamics and the exosystem pose a significant challenge to the problem. In light of a nonlinear internal model approach, we first convert the robust PORP into a robust non-adaptive stabilization problem for the augmented system with integral Input-to-State Stable (iISS) inverse dynamics. By employing an extremum-seeking control (ESC) approach, the construction of our solution method avoids the use of Nussbaum-type gain techniques to address the robust PORP subject to unknown control directions with time-varying coefficients. The stability of the non-adaptive output regulation design is proven via a Lie bracket averaging technique where uniform ultimate boundedness of the closed-loop signals is guaranteed. As a result, both the estimation and tracking errors converge to zero exponentially, provided that the frequency of the dither signal goes to infinity. Finally, a simulation example with unknown coefficients is provided to exemplify the validity of the proposed control solution frameworks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07106v3-abstract-full').style.display = 'none'; document.getElementById('2304.07106v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.12136">arXiv:2303.12136</a> <span> [<a href="https://arxiv.org/pdf/2303.12136">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Improving Fabrication Fidelity of Integrated Nanophotonic Devices Using Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gostimirovic%2C+D">Dusan Gostimirovic</a>, <a href="/search/eess?searchtype=author&query=Grinberg%2C+Y">Yuri Grinberg</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dan-Xia Xu</a>, <a href="/search/eess?searchtype=author&query=Liboiron-Ladouceur%2C+O">Odile Liboiron-Ladouceur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.12136v1-abstract-short" style="display: inline;"> Next-generation integrated nanophotonic device designs leverage advanced optimization techniques such as inverse design and topology optimization which achieve high performance and extreme miniaturization by optimizing a massively complex design space enabled by small feature sizes. However, unless the optimization is heavily constrained, the generated small features are not reliably fabricated, l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12136v1-abstract-full').style.display = 'inline'; document.getElementById('2303.12136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.12136v1-abstract-full" style="display: none;"> Next-generation integrated nanophotonic device designs leverage advanced optimization techniques such as inverse design and topology optimization which achieve high performance and extreme miniaturization by optimizing a massively complex design space enabled by small feature sizes. However, unless the optimization is heavily constrained, the generated small features are not reliably fabricated, leading to optical performance degradation. Even for simpler, conventional designs, fabrication-induced performance degradation still occurs. The degree of deviation from the original design not only depends on the size and shape of its features, but also on the distribution of features and the surrounding environment, presenting complex, proximity-dependent behavior. Without proprietary fabrication process specifications, design corrections can only be made after calibrating fabrication runs take place. In this work, we introduce a general deep machine learning model that automatically corrects photonic device design layouts prior to first fabrication. Only a small set of scanning electron microscopy images of engineered training features are required to create the deep learning model. With correction, the outcome of the fabricated layout is closer to what is intended, and thus so too is the performance of the design. Without modifying the nanofabrication process, adding significant computation in design, or requiring proprietary process specifications, we believe our model opens the door to new levels of reliability and performance in next-generation photonic circuits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12136v1-abstract-full').style.display = 'none'; document.getElementById('2303.12136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.11795">arXiv:2302.11795</a> <span> [<a href="https://arxiv.org/pdf/2302.11795">pdf</a>, <a href="https://arxiv.org/format/2302.11795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bridging Synthetic and Real Images: a Transferable and Multiple Consistency aided Fundus Image Enhancement Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+E">Erjian Guo</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+H">Huazhu Fu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+L">Luping Zhou</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.11795v1-abstract-short" style="display: inline;"> Deep learning based image enhancement models have largely improved the readability of fundus images in order to decrease the uncertainty of clinical observations and the risk of misdiagnosis. However, due to the difficulty of acquiring paired real fundus images at different qualities, most existing methods have to adopt synthetic image pairs as training data. The domain shift between the synthetic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11795v1-abstract-full').style.display = 'inline'; document.getElementById('2302.11795v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.11795v1-abstract-full" style="display: none;"> Deep learning based image enhancement models have largely improved the readability of fundus images in order to decrease the uncertainty of clinical observations and the risk of misdiagnosis. However, due to the difficulty of acquiring paired real fundus images at different qualities, most existing methods have to adopt synthetic image pairs as training data. The domain shift between the synthetic and the real images inevitably hinders the generalization of such models on clinical data. In this work, we propose an end-to-end optimized teacher-student framework to simultaneously conduct image enhancement and domain adaptation. The student network uses synthetic pairs for supervised enhancement, and regularizes the enhancement model to reduce domain-shift by enforcing teacher-student prediction consistency on the real fundus images without relying on enhanced ground-truth. Moreover, we also propose a novel multi-stage multi-attention guided enhancement network (MAGE-Net) as the backbones of our teacher and student network. Our MAGE-Net utilizes multi-stage enhancement module and retinal structure preservation module to progressively integrate the multi-scale features and simultaneously preserve the retinal structures for better fundus image quality enhancement. Comprehensive experiments on both real and synthetic datasets demonstrate that our framework outperforms the baseline approaches. Moreover, our method also benefits the downstream clinical tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11795v1-abstract-full').style.display = 'none'; document.getElementById('2302.11795v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.10124">arXiv:2302.10124</a> <span> [<a href="https://arxiv.org/pdf/2302.10124">pdf</a>, <a href="https://arxiv.org/format/2302.10124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Energy-Aware Resource Allocation and Trajectory Design for UAV-Enabled ISAC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khalili%2C+A">Ata Khalili</a>, <a href="/search/eess?searchtype=author&query=Rezaei%2C+A">Atefeh Rezaei</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Dongfang Xu</a>, <a href="/search/eess?searchtype=author&query=Schober%2C+R">Robert Schober</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.10124v2-abstract-short" style="display: inline;"> In this paper, we investigate joint resource allocation and trajectory design for multi-user multi-target unmanned aerial vehicle (UAV)-enabled integrated sensing and communication (ISAC). To improve sensing accuracy, the UAV is forced to hover during sensing.~In particular, we jointly optimize the two-dimensional trajectory, velocity, downlink information and sensing beamformers, and sensing indi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.10124v2-abstract-full').style.display = 'inline'; document.getElementById('2302.10124v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.10124v2-abstract-full" style="display: none;"> In this paper, we investigate joint resource allocation and trajectory design for multi-user multi-target unmanned aerial vehicle (UAV)-enabled integrated sensing and communication (ISAC). To improve sensing accuracy, the UAV is forced to hover during sensing.~In particular, we jointly optimize the two-dimensional trajectory, velocity, downlink information and sensing beamformers, and sensing indicator to minimize the average power consumption of a fixed-altitude UAV, while considering the quality of service of the communication users and the sensing tasks. To tackle the resulting non-convex mixed integer non-linear program (MINLP), we exploit semidefinite relaxation, the big-M method, and successive convex approximation to develop an alternating optimization-based algorithm.~Our simulation results demonstrate the significant power savings enabled by the proposed scheme compared to two baseline schemes employing heuristic trajectories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.10124v2-abstract-full').style.display = 'none'; document.getElementById('2302.10124v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for presentation at IEEE GLOBECOM 2023</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xu%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Xu%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository