Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 360 results for author: <span class="mathjax">Shi, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Shi%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Shi, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Shi%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Shi, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13766">arXiv:2411.13766</a> <span> [<a href="https://arxiv.org/pdf/2411.13766">pdf</a>, <a href="https://arxiv.org/format/2411.13766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Tiny-Align: Bridging Automatic Speech Recognition and Large Language Model on the Edge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qin%2C+R">Ruiyang Qin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+D">Dancheng Liu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+G">Gelei Xu</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Z">Zheyu Yan</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Chenhui Xu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yuting Hu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+X+S">X. Sharon Hu</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+J">Jinjun Xiong</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yiyu Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13766v1-abstract-short" style="display: inline;"> The combination of Large Language Models (LLM) and Automatic Speech Recognition (ASR), when deployed on edge devices (called edge ASR-LLM), can serve as a powerful personalized assistant to enable audio-based interaction for users. Compared to text-based interaction, edge ASR-LLM allows accessible and natural audio interactions. Unfortunately, existing ASR-LLM models are mainly trained in high-per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13766v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13766v1-abstract-full" style="display: none;"> The combination of Large Language Models (LLM) and Automatic Speech Recognition (ASR), when deployed on edge devices (called edge ASR-LLM), can serve as a powerful personalized assistant to enable audio-based interaction for users. Compared to text-based interaction, edge ASR-LLM allows accessible and natural audio interactions. Unfortunately, existing ASR-LLM models are mainly trained in high-performance computing environments and produce substantial model weights, making them difficult to deploy on edge devices. More importantly, to better serve users' personalized needs, the ASR-LLM must be able to learn from each distinct user, given that audio input often contains highly personalized characteristics that necessitate personalized on-device training. Since individually fine-tuning the ASR or LLM often leads to suboptimal results due to modality-specific limitations, end-to-end training ensures seamless integration of audio features and language understanding (cross-modal alignment), ultimately enabling a more personalized and efficient adaptation on edge devices. However, due to the complex training requirements and substantial computational demands of existing approaches, cross-modal alignment between ASR audio and LLM can be challenging on edge devices. In this work, we propose a resource-efficient cross-modal alignment framework that bridges ASR and LLMs on edge devices to handle personalized audio input. Our framework enables efficient ASR-LLM alignment on resource-constrained devices like NVIDIA Jetson Orin (8GB RAM), achieving 50x training time speedup while improving the alignment quality by more than 50\%. To the best of our knowledge, this is the first work to study efficient ASR-LLM alignment on resource-constrained edge devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13766v1-abstract-full').style.display = 'none'; document.getElementById('2411.13766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13560">arXiv:2411.13560</a> <span> [<a href="https://arxiv.org/pdf/2411.13560">pdf</a>, <a href="https://arxiv.org/format/2411.13560">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> AMSnet-KG: A Netlist Dataset for LLM-based AMS Circuit Auto-Design Using Knowledge Graph RAG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yichen Shi</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+Z">Zhuofu Tao</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yuhao Gao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+T">Tianjia Zhou</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+C">Cheng Chang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yaxing Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+B">Bingyu Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+G">Genhao Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+A">Alvin Liu</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+Z">Zhiping Yu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+T">Ting-Jung Lin</a>, <a href="/search/eess?searchtype=author&query=He%2C+L">Lei He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13560v1-abstract-short" style="display: inline;"> High-performance analog and mixed-signal (AMS) circuits are mainly full-custom designed, which is time-consuming and labor-intensive. A significant portion of the effort is experience-driven, which makes the automation of AMS circuit design a formidable challenge. Large language models (LLMs) have emerged as powerful tools for Electronic Design Automation (EDA) applications, fostering advancements… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13560v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13560v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13560v1-abstract-full" style="display: none;"> High-performance analog and mixed-signal (AMS) circuits are mainly full-custom designed, which is time-consuming and labor-intensive. A significant portion of the effort is experience-driven, which makes the automation of AMS circuit design a formidable challenge. Large language models (LLMs) have emerged as powerful tools for Electronic Design Automation (EDA) applications, fostering advancements in the automatic design process for large-scale AMS circuits. However, the absence of high-quality datasets has led to issues such as model hallucination, which undermines the robustness of automatically generated circuit designs. To address this issue, this paper introduces AMSnet-KG, a dataset encompassing various AMS circuit schematics and netlists. We construct a knowledge graph with annotations on detailed functional and performance characteristics. Facilitated by AMSnet-KG, we propose an automated AMS circuit generation framework that utilizes the comprehensive knowledge embedded in LLMs. We first formulate a design strategy (e.g., circuit architecture using a number of circuit components) based on required specifications. Next, matched circuit components are retrieved and assembled into a complete topology, and transistor sizing is obtained through Bayesian optimization. Simulation results of the netlist are fed back to the LLM for further topology refinement, ensuring the circuit design specifications are met. We perform case studies of operational amplifier and comparator design to verify the automatic design flow from specifications to netlists with minimal human effort. The dataset used in this paper will be open-sourced upon publishing of this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13560v1-abstract-full').style.display = 'none'; document.getElementById('2411.13560v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12547">arXiv:2411.12547</a> <span> [<a href="https://arxiv.org/pdf/2411.12547">pdf</a>, <a href="https://arxiv.org/format/2411.12547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> S3TU-Net: Structured Convolution and Superpixel Transformer for Lung Nodule Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yuke Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yunyu Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xinyi Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhenglei Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">YuQing Xu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S+H">Shuo Hong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12547v1-abstract-short" style="display: inline;"> The irregular and challenging characteristics of lung adenocarcinoma nodules in computed tomography (CT) images complicate staging diagnosis, making accurate segmentation critical for clinicians to extract detailed lesion information. In this study, we propose a segmentation model, S3TU-Net, which integrates multi-dimensional spatial connectors and a superpixel-based visual transformer. S3TU-Net i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12547v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12547v1-abstract-full" style="display: none;"> The irregular and challenging characteristics of lung adenocarcinoma nodules in computed tomography (CT) images complicate staging diagnosis, making accurate segmentation critical for clinicians to extract detailed lesion information. In this study, we propose a segmentation model, S3TU-Net, which integrates multi-dimensional spatial connectors and a superpixel-based visual transformer. S3TU-Net is built on a multi-view CNN-Transformer hybrid architecture, incorporating superpixel algorithms, structured weighting, and spatial shifting techniques to achieve superior segmentation performance. The model leverages structured convolution blocks (DWF-Conv/D2BR-Conv) to extract multi-scale local features while mitigating overfitting. To enhance multi-scale feature fusion, we introduce the S2-MLP Link, integrating spatial shifting and attention mechanisms at the skip connections. Additionally, the residual-based superpixel visual transformer (RM-SViT) effectively merges global and local features by employing sparse correlation learning and multi-branch attention to capture long-range dependencies, with residual connections enhancing stability and computational efficiency. Experimental results on the LIDC-IDRI dataset demonstrate that S3TU-Net achieves a DSC, precision, and IoU of 89.04%, 90.73%, and 90.70%, respectively. Compared to recent methods, S3TU-Net improves DSC by 4.52% and sensitivity by 3.16%, with other metrics showing an approximate 2% increase. In addition to comparison and ablation studies, we validated the generalization ability of our model on the EPDB private dataset, achieving a DSC of 86.40%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12547v1-abstract-full').style.display = 'none'; document.getElementById('2411.12547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11282">arXiv:2411.11282</a> <span> [<a href="https://arxiv.org/pdf/2411.11282">pdf</a>, <a href="https://arxiv.org/format/2411.11282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Continuous K-space Recovery Network with Image Guidance for Fast MRI Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Meng%2C+Y">Yucong Meng</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Zhiwei Yang</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+M">Minghong Duan</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yonghong Shi</a>, <a href="/search/eess?searchtype=author&query=Song%2C+Z">Zhijian Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11282v1-abstract-short" style="display: inline;"> Magnetic resonance imaging (MRI) is a crucial tool for clinical diagnosis while facing the challenge of long scanning time. To reduce the acquisition time, fast MRI reconstruction aims to restore high-quality images from the undersampled k-space. Existing methods typically train deep learning models to map the undersampled data to artifact-free MRI images. However, these studies often overlook the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11282v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11282v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11282v1-abstract-full" style="display: none;"> Magnetic resonance imaging (MRI) is a crucial tool for clinical diagnosis while facing the challenge of long scanning time. To reduce the acquisition time, fast MRI reconstruction aims to restore high-quality images from the undersampled k-space. Existing methods typically train deep learning models to map the undersampled data to artifact-free MRI images. However, these studies often overlook the unique properties of k-space and directly apply general networks designed for image processing to k-space recovery, leaving the precise learning of k-space largely underexplored. In this work, we propose a continuous k-space recovery network from a new perspective of implicit neural representation with image domain guidance, which boosts the performance of MRI reconstruction. Specifically, (1) an implicit neural representation based encoder-decoder structure is customized to continuously query unsampled k-values. (2) an image guidance module is designed to mine the semantic information from the low-quality MRI images to further guide the k-space recovery. (3) a multi-stage training strategy is proposed to recover dense k-space progressively. Extensive experiments conducted on CC359, fastMRI, and IXI datasets demonstrate the effectiveness of our method and its superiority over other competitors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11282v1-abstract-full').style.display = 'none'; document.getElementById('2411.11282v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11232">arXiv:2411.11232</a> <span> [<a href="https://arxiv.org/pdf/2411.11232">pdf</a>, <a href="https://arxiv.org/format/2411.11232">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SAMOS: A Neural MOS Prediction Model Leveraging Semantic Representations and Acoustic Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yu-Fei Shi</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+Y">Yang Ai</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Ye-Xin Lu</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hui-Peng Du</a>, <a href="/search/eess?searchtype=author&query=Ling%2C+Z">Zhen-Hua Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11232v1-abstract-short" style="display: inline;"> Assessing the naturalness of speech using mean opinion score (MOS) prediction models has positive implications for the automatic evaluation of speech synthesis systems. Early MOS prediction models took the raw waveform or amplitude spectrum of speech as input, whereas more advanced methods employed self-supervised-learning (SSL) based models to extract semantic representations from speech for MOS… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11232v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11232v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11232v1-abstract-full" style="display: none;"> Assessing the naturalness of speech using mean opinion score (MOS) prediction models has positive implications for the automatic evaluation of speech synthesis systems. Early MOS prediction models took the raw waveform or amplitude spectrum of speech as input, whereas more advanced methods employed self-supervised-learning (SSL) based models to extract semantic representations from speech for MOS prediction. These methods utilized limited aspects of speech information for MOS prediction, resulting in restricted prediction accuracy. Therefore, in this paper, we propose SAMOS, a MOS prediction model that leverages both Semantic and Acoustic information of speech to be assessed. Specifically, the proposed SAMOS leverages a pretrained wav2vec2 to extract semantic representations and uses the feature extractor of a pretrained BiVocoder to extract acoustic features. These two types of features are then fed into the prediction network, which includes multi-task heads and an aggregation layer, to obtain the final MOS score. Experimental results demonstrate that the proposed SAMOS outperforms current state-of-the-art MOS prediction models on the BVCC dataset and performs comparable performance on the BC2019 dataset, according to the results of system-level evaluation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11232v1-abstract-full').style.display = 'none'; document.getElementById('2411.11232v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11123">arXiv:2411.11123</a> <span> [<a href="https://arxiv.org/pdf/2411.11123">pdf</a>, <a href="https://arxiv.org/format/2411.11123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Pitch-and-Spectrum-Aware Singing Quality Assessment with Bias Correction and Model Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yu-Fei Shi</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+Y">Yang Ai</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Ye-Xin Lu</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hui-Peng Du</a>, <a href="/search/eess?searchtype=author&query=Ling%2C+Z">Zhen-Hua Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11123v1-abstract-short" style="display: inline;"> We participated in track 2 of the VoiceMOS Challenge 2024, which aimed to predict the mean opinion score (MOS) of singing samples. Our submission secured the first place among all participating teams, excluding the official baseline. In this paper, we further improve our submission and propose a novel Pitch-and-Spectrum-aware Singing Quality Assessment (PS-SQA) method. The PS-SQA is designed based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11123v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11123v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11123v1-abstract-full" style="display: none;"> We participated in track 2 of the VoiceMOS Challenge 2024, which aimed to predict the mean opinion score (MOS) of singing samples. Our submission secured the first place among all participating teams, excluding the official baseline. In this paper, we further improve our submission and propose a novel Pitch-and-Spectrum-aware Singing Quality Assessment (PS-SQA) method. The PS-SQA is designed based on the self-supervised-learning (SSL) MOS predictor, incorporating singing pitch and spectral information, which are extracted using pitch histogram and non-quantized neural codec, respectively. Additionally, the PS-SQA introduces a bias correction strategy to address prediction biases caused by low-resource training samples, and employs model fusion technology to further enhance prediction accuracy. Experimental results confirm that our proposed PS-SQA significantly outperforms all competing systems across all system-level metrics, confirming its strong sing quality assessment capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11123v1-abstract-full').style.display = 'none'; document.getElementById('2411.11123v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07484">arXiv:2411.07484</a> <span> [<a href="https://arxiv.org/pdf/2411.07484">pdf</a>, <a href="https://arxiv.org/format/2411.07484">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Convergence Guarantees for Differentiable Optimization-based Control Policy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bian%2C+Y">Yuexin Bian</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+J">Jie Feng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07484v1-abstract-short" style="display: inline;"> Effective control of real-world systems necessitates the development of controllers that are not only performant but also interpretable. To this end, the field has seen a surge in model-based control policies, which first leverage historical data to learn system cost and dynamics, and then utilize the learned models for control. However, due to this decoupling, model-based control policies fall sh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07484v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07484v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07484v1-abstract-full" style="display: none;"> Effective control of real-world systems necessitates the development of controllers that are not only performant but also interpretable. To this end, the field has seen a surge in model-based control policies, which first leverage historical data to learn system cost and dynamics, and then utilize the learned models for control. However, due to this decoupling, model-based control policies fall short when deployed in optimal control settings and lack convergence guarantees for achieving optimality. In this paper, we present DiffOP, a Differentiable Optimization-based Policy for optimal control. In the proposed framework, control actions are derived by solving an optimization, where the control cost and system's dynamics can be parameterized as neural networks. The key idea of DiffOP, inspired by differentiable optimization techniques, is to jointly learn the control policy using both policy gradients and optimization gradients, while utilizing actual cost feedback during system interaction. Further, this study presents the first theoretical analysis of the convergence rates and sample complexity for learning the optimization control policy with a policy gradient approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07484v1-abstract-full').style.display = 'none'; document.getElementById('2411.07484v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23592">arXiv:2410.23592</a> <span> [<a href="https://arxiv.org/pdf/2410.23592">pdf</a>, <a href="https://arxiv.org/format/2410.23592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Distributed Observer-based Model Predictive Control for Multi-agent Formation with Resilience to Communication Link Faults </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+B">Binyan Xu</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Y">Yufan Dai</a>, <a href="/search/eess?searchtype=author&query=Suleman%2C+A">Afzal Suleman</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yang Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23592v1-abstract-short" style="display: inline;"> In order to address the nonlinear multi-agent formation tracking control problem with input constraints and unknown communication faults, a novel adaptive distributed observer-based distributed model predictive control method is developed in this paper. This design employs adaptive distributed observers in local control systems to estimate the leader's state, dynamics, and relative positioning wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23592v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23592v1-abstract-full" style="display: none;"> In order to address the nonlinear multi-agent formation tracking control problem with input constraints and unknown communication faults, a novel adaptive distributed observer-based distributed model predictive control method is developed in this paper. This design employs adaptive distributed observers in local control systems to estimate the leader's state, dynamics, and relative positioning with respect to the leader. Utilizing the estimated data as local references, the original formation tracking control problem can be decomposed into several fully localized tracking control problems, which can be efficiently solved by the local predictive controller. Through the incorporation of adaptive distributed observers, this proposed design not only enhances the resilience of distributed formation tracking against communication faults but also simplifies the distributed model predictive control formulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23592v1-abstract-full').style.display = 'none'; document.getElementById('2410.23592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23073">arXiv:2410.23073</a> <span> [<a href="https://arxiv.org/pdf/2410.23073">pdf</a>, <a href="https://arxiv.org/format/2410.23073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> RSNet: A Light Framework for The Detection of Multi-scale Remote Sensing Targets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chengcheng Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fei Wang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuhu Shi</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+W">Weiming Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23073v3-abstract-short" style="display: inline;"> Recent advancements in synthetic aperture radar (SAR) ship detection using deep learning have significantly improved accuracy and speed, yet effectively detecting small objects in complex backgrounds with fewer parameters remains a challenge. This letter introduces RSNet, a lightweight framework constructed to enhance ship detection in SAR imagery. To ensure accuracy with fewer parameters, we prop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23073v3-abstract-full').style.display = 'inline'; document.getElementById('2410.23073v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23073v3-abstract-full" style="display: none;"> Recent advancements in synthetic aperture radar (SAR) ship detection using deep learning have significantly improved accuracy and speed, yet effectively detecting small objects in complex backgrounds with fewer parameters remains a challenge. This letter introduces RSNet, a lightweight framework constructed to enhance ship detection in SAR imagery. To ensure accuracy with fewer parameters, we proposed Waveletpool-ContextGuided (WCG) as its backbone, guiding global context understanding through multi-scale wavelet features for effective detection in complex scenes. Additionally, Waveletpool-StarFusion (WSF) is introduced as the neck, employing a residual wavelet element-wise multiplication structure to achieve higher dimensional nonlinear features without increasing network width. The Lightweight-Shared (LS) module is designed as detect components to achieve efficient detection through lightweight shared convolutional structure and multi-format compatibility. Experiments on the SAR Ship Detection Dataset (SSDD) and High-Resolution SAR Image Dataset (HRSID) demonstrate that RSNet achieves a strong balance between lightweight design and detection performance, surpassing many state-of-the-art detectors, reaching 72.5\% and 67.6\% in \textbf{$\mathbf{mAP_{.50:.95}}$ }respectively with 1.49M parameters. Our code will be released soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23073v3-abstract-full').style.display = 'none'; document.getElementById('2410.23073v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22057">arXiv:2410.22057</a> <span> [<a href="https://arxiv.org/pdf/2410.22057">pdf</a>, <a href="https://arxiv.org/format/2410.22057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FANCL: Feature-Guided Attention Network with Curriculum Learning for Brain Metastases Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zijiang Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xiaoyu Liu</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+L">Linhao Qu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yonghong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22057v1-abstract-short" style="display: inline;"> Accurate segmentation of brain metastases (BMs) in MR image is crucial for the diagnosis and follow-up of patients. Methods based on deep convolutional neural networks (CNNs) have achieved high segmentation performance. However, due to the loss of critical feature information caused by convolutional and pooling operations, CNNs still face great challenges in small BMs segmentation. Besides, BMs ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22057v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22057v1-abstract-full" style="display: none;"> Accurate segmentation of brain metastases (BMs) in MR image is crucial for the diagnosis and follow-up of patients. Methods based on deep convolutional neural networks (CNNs) have achieved high segmentation performance. However, due to the loss of critical feature information caused by convolutional and pooling operations, CNNs still face great challenges in small BMs segmentation. Besides, BMs are irregular and easily confused with healthy tissues, which makes it difficult for the model to effectively learn tumor structure during training. To address these issues, this paper proposes a novel model called feature-guided attention network with curriculum learning (FANCL). Based on CNNs, FANCL utilizes the input image and its feature to establish the intrinsic connections between metastases of different sizes, which can effectively compensate for the loss of high-level feature from small tumors with the information of large tumors. Furthermore, FANCL applies the voxel-level curriculum learning strategy to help the model gradually learn the structure and details of BMs. And baseline models of varying depths are employed as curriculum-mining networks for organizing the curriculum progression. The evaluation results on the BraTS-METS 2023 dataset indicate that FANCL significantly improves the segmentation performance, confirming the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22057v1-abstract-full').style.display = 'none'; document.getElementById('2410.22057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19432">arXiv:2410.19432</a> <span> [<a href="https://arxiv.org/pdf/2410.19432">pdf</a>, <a href="https://arxiv.org/format/2410.19432">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Image-Based Visual Servoing for Enhanced Cooperation of Dual-Arm Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zizhe Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yuan Yang</a>, <a href="/search/eess?searchtype=author&query=Zuo%2C+W">Wenqiang Zuo</a>, <a href="/search/eess?searchtype=author&query=Song%2C+G">Guangming Song</a>, <a href="/search/eess?searchtype=author&query=Song%2C+A">Aiguo Song</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yang Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19432v2-abstract-short" style="display: inline;"> The cooperation of a pair of robot manipulators is required to manipulate a target object without any fixtures. The conventional control methods coordinate the end-effector pose of each manipulator with that of the other using their kinematics and joint coordinate measurements. Yet, the manipulators' inaccurate kinematics and joint coordinate measurements can cause significant pose synchronization… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19432v2-abstract-full').style.display = 'inline'; document.getElementById('2410.19432v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19432v2-abstract-full" style="display: none;"> The cooperation of a pair of robot manipulators is required to manipulate a target object without any fixtures. The conventional control methods coordinate the end-effector pose of each manipulator with that of the other using their kinematics and joint coordinate measurements. Yet, the manipulators' inaccurate kinematics and joint coordinate measurements can cause significant pose synchronization errors in practice. This paper thus proposes an image-based visual servoing approach for enhancing the cooperation of a dual-arm manipulation system. On top of the classical control, the visual servoing controller lets each manipulator use its carried camera to measure the image features of the other's marker and adapt its end-effector pose with the counterpart on the move. Because visual measurements are robust to kinematic errors, the proposed control can reduce the end-effector pose synchronization errors and the fluctuations of the interaction forces of the pair of manipulators on the move. Theoretical analyses have rigorously proven the stability of the closed-loop system. Comparative experiments on real robots have substantiated the effectiveness of the proposed control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19432v2-abstract-full').style.display = 'none'; document.getElementById('2410.19432v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures. Corresponding author: Yuan Yang (yuan_evan_yang@seu.edu.cn). For associated video file, see https://zizhe.io/assets/d16d4124b851e10a9db1775ed4a4ece9.mp4 This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07277">arXiv:2410.07277</a> <span> [<a href="https://arxiv.org/pdf/2410.07277">pdf</a>, <a href="https://arxiv.org/format/2410.07277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Swin-BERT: A Feature Fusion System designed for Speech-based Alzheimer's Dementia Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Pan%2C+Y">Yilin Pan</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yanpei Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yijia Zhang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+M">Mingyu Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07277v1-abstract-short" style="display: inline;"> Speech is usually used for constructing an automatic Alzheimer's dementia (AD) detection system, as the acoustic and linguistic abilities show a decline in people living with AD at the early stages. However, speech includes not only AD-related local and global information but also other information unrelated to cognitive status, such as age and gender. In this paper, we propose a speech-based syst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07277v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07277v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07277v1-abstract-full" style="display: none;"> Speech is usually used for constructing an automatic Alzheimer's dementia (AD) detection system, as the acoustic and linguistic abilities show a decline in people living with AD at the early stages. However, speech includes not only AD-related local and global information but also other information unrelated to cognitive status, such as age and gender. In this paper, we propose a speech-based system named Swin-BERT for automatic dementia detection. For the acoustic part, the shifted windows multi-head attention that proposed to extract local and global information from images, is used for designing our acoustic-based system. To decouple the effect of age and gender on acoustic feature extraction, they are used as an extra input of the designed acoustic system. For the linguistic part, the rhythm-related information, which varies significantly between people living with and without AD, is removed while transcribing the audio recordings into transcripts. To compensate for the removed rhythm-related information, the character-level transcripts are proposed to be used as the extra input of a word-level BERT-style system. Finally, the Swin-BERT combines the acoustic features learned from our proposed acoustic-based system with our linguistic-based system. The experiments are based on the two datasets provided by the international dementia detection challenges: the ADReSS and ADReSSo. The results show that both the proposed acoustic and linguistic systems can be better or comparable with previous research on the two datasets. Superior results are achieved by the proposed Swin-BERT system on the ADReSS and ADReSSo datasets, which are 85.58\% F-score and 87.32\% F-score respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07277v1-abstract-full').style.display = 'none'; document.getElementById('2410.07277v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05883">arXiv:2410.05883</a> <span> [<a href="https://arxiv.org/pdf/2410.05883">pdf</a>, <a href="https://arxiv.org/format/2410.05883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Improved PCRLB for radar tracking in clutter with geometry-dependent target measurement uncertainty and application to radar trajectory control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yifang Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+L">Linjiao Fu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+D">Dongliang Peng</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Q">Qiang Lu</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+J+W">Jee Woong Choi</a>, <a href="/search/eess?searchtype=author&query=Farina%2C+A">Alfonso Farina</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05883v1-abstract-short" style="display: inline;"> In realistic radar tracking, target measurement uncertainty (TMU) in terms of both detection probability and measurement error covariance is significantly affected by the target-to-radar (T2R) geometry. However, existing posterior Cramer-Rao Lower Bounds (PCRLBs) rarely investigate the fundamental impact of T2R geometry on target measurement uncertainty and eventually on mean square error (MSE) of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05883v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05883v1-abstract-full" style="display: none;"> In realistic radar tracking, target measurement uncertainty (TMU) in terms of both detection probability and measurement error covariance is significantly affected by the target-to-radar (T2R) geometry. However, existing posterior Cramer-Rao Lower Bounds (PCRLBs) rarely investigate the fundamental impact of T2R geometry on target measurement uncertainty and eventually on mean square error (MSE) of state estimate, inevitably resulting in over-conservative lower bound. To address this issue, this paper firstly derives the generalized model of target measurement error covariance for bistatic radar with moving receiver and transmitter illuminating any type of signal, along with its approximated solution to specify the impact of T2R geometry on error covariance. Based upon formulated TMU model, an improved PCRLB (IPCRLB) fully accounting for both measurement origin uncertainty and geometry-dependent TMU is then re-derived, both detection probability and measurement error covariance are treated as state-dependent parameters when differentiating log-likelihood with respect to target state. Compared to existing PCRLBs that partially or completely ignore the dependence of target measurement uncertainty on T2R geometry, proposed IPCRLB provides a much accurate (less-conservative) lower bound for radar tracking in clutter with geometry-dependent TMU. The new bound is then applied to radar trajectory control to effectively optimize T2R geometry and exhibits least uncertainty of acquired target measurement and more accurate state estimate for bistatic radar tracking in clutter, compared to state-of-the-art trajectory control methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05883v1-abstract-full').style.display = 'none'; document.getElementById('2410.05883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,12 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05115">arXiv:2410.05115</a> <span> [<a href="https://arxiv.org/pdf/2410.05115">pdf</a>, <a href="https://arxiv.org/format/2410.05115">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> AlphaRouter: Quantum Circuit Routing with Reinforcement Learning and Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tang%2C+W">Wei Tang</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+Y">Yiheng Duan</a>, <a href="/search/eess?searchtype=author&query=Kharkov%2C+Y">Yaroslav Kharkov</a>, <a href="/search/eess?searchtype=author&query=Fakoor%2C+R">Rasool Fakoor</a>, <a href="/search/eess?searchtype=author&query=Kessler%2C+E">Eric Kessler</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yunong Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05115v1-abstract-short" style="display: inline;"> Quantum computers have the potential to outperform classical computers in important tasks such as optimization and number factoring. They are characterized by limited connectivity, which necessitates the routing of their computational bits, known as qubits, to specific locations during program execution to carry out quantum operations. Traditionally, the NP-hard optimization problem of minimizing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05115v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05115v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05115v1-abstract-full" style="display: none;"> Quantum computers have the potential to outperform classical computers in important tasks such as optimization and number factoring. They are characterized by limited connectivity, which necessitates the routing of their computational bits, known as qubits, to specific locations during program execution to carry out quantum operations. Traditionally, the NP-hard optimization problem of minimizing the routing overhead has been addressed through sub-optimal rule-based routing techniques with inherent human biases embedded within the cost function design. This paper introduces a solution that integrates Monte Carlo Tree Search (MCTS) with Reinforcement Learning (RL). Our RL-based router, called AlphaRouter, outperforms the current state-of-the-art routing methods and generates quantum programs with up to $20\%$ less routing overhead, thus significantly enhancing the overall efficiency and feasibility of quantum computing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05115v1-abstract-full').style.display = 'none'; document.getElementById('2410.05115v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 11 figures, International Conference on Quantum Computing and Engineering - QCE24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04017">arXiv:2410.04017</a> <span> [<a href="https://arxiv.org/pdf/2410.04017">pdf</a>, <a href="https://arxiv.org/format/2410.04017">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Attacks and Robust Defenses in Speaker Embedding based Zero-Shot Text-to-Speech System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Z">Ze Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yao Shi</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yunfei Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Ming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04017v1-abstract-short" style="display: inline;"> Speaker embedding based zero-shot Text-to-Speech (TTS) systems enable high-quality speech synthesis for unseen speakers using minimal data. However, these systems are vulnerable to adversarial attacks, where an attacker introduces imperceptible perturbations to the original speaker's audio waveform, leading to synthesized speech sounds like another person. This vulnerability poses significant secu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04017v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04017v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04017v1-abstract-full" style="display: none;"> Speaker embedding based zero-shot Text-to-Speech (TTS) systems enable high-quality speech synthesis for unseen speakers using minimal data. However, these systems are vulnerable to adversarial attacks, where an attacker introduces imperceptible perturbations to the original speaker's audio waveform, leading to synthesized speech sounds like another person. This vulnerability poses significant security risks, including speaker identity spoofing and unauthorized voice manipulation. This paper investigates two primary defense strategies to address these threats: adversarial training and adversarial purification. Adversarial training enhances the model's robustness by integrating adversarial examples during the training process, thereby improving resistance to such attacks. Adversarial purification, on the other hand, employs diffusion probabilistic models to revert adversarially perturbed audio to its clean form. Experimental results demonstrate that these defense mechanisms can significantly reduce the impact of adversarial perturbations, enhancing the security and reliability of speaker embedding based zero-shot TTS systems in adversarial environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04017v1-abstract-full').style.display = 'none'; document.getElementById('2410.04017v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03559">arXiv:2410.03559</a> <span> [<a href="https://arxiv.org/pdf/2410.03559">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Optimizing food taste sensory evaluation through neural network-based taste electroencephalogram channel selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiuxin Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qun Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">He Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chenrui Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+P">Pengwei Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yan Shi</a>, <a href="/search/eess?searchtype=author&query=Men%2C+H">Hong Men</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03559v1-abstract-short" style="display: inline;"> The taste electroencephalogram (EEG) evoked by the taste stimulation can reflect different brain patterns and be used in applications such as sensory evaluation of food. However, considering the computational cost and efficiency, EEG data with many channels has to face the critical issue of channel selection. This paper proposed a channel selection method called class activation mapping with atten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03559v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03559v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03559v1-abstract-full" style="display: none;"> The taste electroencephalogram (EEG) evoked by the taste stimulation can reflect different brain patterns and be used in applications such as sensory evaluation of food. However, considering the computational cost and efficiency, EEG data with many channels has to face the critical issue of channel selection. This paper proposed a channel selection method called class activation mapping with attention (CAM-Attention). The CAM-Attention method combined a convolutional neural network with channel and spatial attention (CNN-CSA) model with a gradient-weighted class activation mapping (Grad-CAM) model. The CNN-CSA model exploited key features in EEG data by attention mechanism, and the Grad-CAM model effectively realized the visualization of feature regions. Then, channel selection was effectively implemented based on feature regions. Finally, the CAM-Attention method reduced the computational burden of taste EEG recognition and effectively distinguished the four tastes. In short, it has excellent recognition performance and provides effective technical support for taste sensory evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03559v1-abstract-full').style.display = 'none'; document.getElementById('2410.03559v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03143">arXiv:2410.03143</a> <span> [<a href="https://arxiv.org/pdf/2410.03143">pdf</a>, <a href="https://arxiv.org/format/2410.03143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ECHOPulse: ECG controlled echocardio-grams video generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yiwei Li</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">Sekeun Kim</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zihao Wu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Hanqi Jiang</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+Y">Yi Pan</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+P">Pengfei Jin</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Sifan Song</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yucheng Shi</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+T">Tianming Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Quanzheng Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03143v2-abstract-short" style="display: inline;"> Echocardiography (ECHO) is essential for cardiac assessments, but its video quality and interpretation heavily relies on manual expertise, leading to inconsistent results from clinical and portable devices. ECHO video generation offers a solution by improving automated monitoring through synthetic data and generating high-quality videos from routine health data. However, existing models often face… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03143v2-abstract-full').style.display = 'inline'; document.getElementById('2410.03143v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03143v2-abstract-full" style="display: none;"> Echocardiography (ECHO) is essential for cardiac assessments, but its video quality and interpretation heavily relies on manual expertise, leading to inconsistent results from clinical and portable devices. ECHO video generation offers a solution by improving automated monitoring through synthetic data and generating high-quality videos from routine health data. However, existing models often face high computational costs, slow inference, and rely on complex conditional prompts that require experts' annotations. To address these challenges, we propose ECHOPULSE, an ECG-conditioned ECHO video generation model. ECHOPULSE introduces two key advancements: (1) it accelerates ECHO video generation by leveraging VQ-VAE tokenization and masked visual token modeling for fast decoding, and (2) it conditions on readily accessible ECG signals, which are highly coherent with ECHO videos, bypassing complex conditional prompts. To the best of our knowledge, this is the first work to use time-series prompts like ECG signals for ECHO video generation. ECHOPULSE not only enables controllable synthetic ECHO data generation but also provides updated cardiac function information for disease monitoring and prediction beyond ECG alone. Evaluations on three public and private datasets demonstrate state-of-the-art performance in ECHO video generation across both qualitative and quantitative measures. Additionally, ECHOPULSE can be easily generalized to other modality generation tasks, such as cardiac MRI, fMRI, and 3D CT generation. Demo can seen from \url{https://github.com/levyisthebest/ECHOPulse_Prelease}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03143v2-abstract-full').style.display = 'none'; document.getElementById('2410.03143v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19223">arXiv:2409.19223</a> <span> [<a href="https://arxiv.org/pdf/2409.19223">pdf</a>, <a href="https://arxiv.org/format/2409.19223">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Summit Vitals: Multi-Camera and Multi-Signal Biosensing at High Altitudes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+J">Jiankai Tang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Z">Zhang Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xiaojing Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Dong Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanchun Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19223v1-abstract-short" style="display: inline;"> Video photoplethysmography (vPPG) is an emerging method for non-invasive and convenient measurement of physiological signals, utilizing two primary approaches: remote video PPG (rPPG) and contact video PPG (cPPG). Monitoring vitals in high-altitude environments, where heart rates tend to increase and blood oxygen levels often decrease, presents significant challenges. To address these issues, we i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19223v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19223v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19223v1-abstract-full" style="display: none;"> Video photoplethysmography (vPPG) is an emerging method for non-invasive and convenient measurement of physiological signals, utilizing two primary approaches: remote video PPG (rPPG) and contact video PPG (cPPG). Monitoring vitals in high-altitude environments, where heart rates tend to increase and blood oxygen levels often decrease, presents significant challenges. To address these issues, we introduce the SUMS dataset comprising 80 synchronized non-contact facial and contact finger videos from 10 subjects during exercise and oxygen recovery scenarios, capturing PPG, respiration rate (RR), and SpO2. This dataset is designed to validate video vitals estimation algorithms and compare facial rPPG with finger cPPG. Additionally, fusing videos from different positions (i.e., face and finger) reduces the mean absolute error (MAE) of SpO2 predictions by 7.6\% and 10.6\% compared to only face and only finger, respectively. In cross-subject evaluation, we achieve an MAE of less than 0.5 BPM for HR estimation and 2.5\% for SpO2 estimation, demonstrating the precision of our multi-camera fusion techniques. Our findings suggest that simultaneous training on multiple indicators, such as PPG and blood oxygen, can reduce MAE in SpO2 estimation by 17.8\%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19223v1-abstract-full').style.display = 'none'; document.getElementById('2409.19223v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by UIC'24, 8 pages, 5 figures. Ke Liu and Jiankai Tang are co-first authors. Yuntao Wang and Xiaojing Liu are co-corresponding authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17569">arXiv:2409.17569</a> <span> [<a href="https://arxiv.org/pdf/2409.17569">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A novel brain registration model combining structural and functional MRI information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+B">Baolong Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuhu Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Lei Wang</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+W">Weiming Zeng</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+C">Changming Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17569v1-abstract-short" style="display: inline;"> Although developed functional magnetic resonance imaging (fMRI) registration algorithms based on deep learning have achieved a certain degree of alignment of functional area, they underutilized fine structural information. In this paper, we propose a semi-supervised convolutional neural network (CNN) registration model that integrates both structural and functional MRI information. The model first… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17569v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17569v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17569v1-abstract-full" style="display: none;"> Although developed functional magnetic resonance imaging (fMRI) registration algorithms based on deep learning have achieved a certain degree of alignment of functional area, they underutilized fine structural information. In this paper, we propose a semi-supervised convolutional neural network (CNN) registration model that integrates both structural and functional MRI information. The model first learns to generate deformation fields by inputting structural MRI (T1w-MRI) into the CNN to capture fine structural information. Then, we construct a local functional connectivity pattern to describe the local fMRI information, and use the Bhattacharyya coefficient to measure the similarity between two fMRI images, which is used as a loss function to facilitate the alignment of functional areas. In the inter-subject registration experiment, our model achieved an average number of voxels exceeding the threshold of 4.24 is 2248 in the group-level t-test maps for the four functional brain networks (default mode network, visual network, central executive network, and sensorimotor network). Additionally, the atlas-based registration experiment results show that the average number of voxels exceeding this threshold is 3620. The results are the largest among all methods. Our model achieves an excellent registration performance in fMRI and improves the consistency of functional regions. The proposed model has the potential to optimize fMRI image processing and analysis, facilitating the development of fMRI applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17569v1-abstract-full').style.display = 'none'; document.getElementById('2409.17569v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15708">arXiv:2409.15708</a> <span> [<a href="https://arxiv.org/pdf/2409.15708">pdf</a>, <a href="https://arxiv.org/format/2409.15708">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Open-/Closed-loop Active Learning for Data-driven Predictive Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shilun Feng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+D">Dawei Shi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yang Shi</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+K">Kaikai Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15708v1-abstract-short" style="display: inline;"> An important question in data-driven control is how to obtain an informative dataset. In this work, we consider the problem of effective data acquisition of an unknown linear system with bounded disturbance for both open-loop and closed-loop stages. The learning objective is to minimize the volume of the set of admissible systems. First, a performance measure based on historical data and the input… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15708v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15708v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15708v1-abstract-full" style="display: none;"> An important question in data-driven control is how to obtain an informative dataset. In this work, we consider the problem of effective data acquisition of an unknown linear system with bounded disturbance for both open-loop and closed-loop stages. The learning objective is to minimize the volume of the set of admissible systems. First, a performance measure based on historical data and the input sequence is introduced to characterize the upper bound of the volume of the set of admissible systems. On the basis of this performance measure, an open-loop active learning strategy is proposed to minimize the volume by actively designing inputs during the open-loop stage. For the closed-loop stage, an closed-loop active learning strategy is designed to select and learn from informative closed-loop data. The efficiency of the proposed closed-loop active learning strategy is proved by showing that the unselected data cannot benefit the learning performance. Furthermore, an adaptive predictive controller is designed in accordance with the proposed data acquisition approach. The recursive feasibility and the stability of the controller are proved by analyzing the effect of the closed-loop active learning strategy. Finally, numerical examples and comparisons illustrate the effectiveness of the proposed data acquisition strategy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15708v1-abstract-full').style.display = 'none'; document.getElementById('2409.15708v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15436">arXiv:2408.15436</a> <span> [<a href="https://arxiv.org/pdf/2408.15436">pdf</a>, <a href="https://arxiv.org/format/2408.15436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Online Event-Triggered Switching for Frequency Control in Power Grids with Variable Inertia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+J">Jie Feng</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+W">Wenqi Cui</a>, <a href="/search/eess?searchtype=author&query=Cort%C3%A9s%2C+J">Jorge Cort茅s</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15436v1-abstract-short" style="display: inline;"> The increasing integration of renewable energy resources into power grids has led to time-varying system inertia and consequent degradation in frequency dynamics. A promising solution to alleviate performance degradation is using power electronics interfaced energy resources, such as renewable generators and battery energy storage for primary frequency control, by adjusting their power output set-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15436v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15436v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15436v1-abstract-full" style="display: none;"> The increasing integration of renewable energy resources into power grids has led to time-varying system inertia and consequent degradation in frequency dynamics. A promising solution to alleviate performance degradation is using power electronics interfaced energy resources, such as renewable generators and battery energy storage for primary frequency control, by adjusting their power output set-points in response to frequency deviations. However, designing a frequency controller under time-varying inertia is challenging. Specifically, the stability or optimality of controllers designed for time-invariant systems can be compromised once applied to a time-varying system. We model the frequency dynamics under time-varying inertia as a nonlinear switching system, where the frequency dynamics under each mode are described by the nonlinear swing equations and different modes represent different inertia levels. We identify a key controller structure, named Neural Proportional-Integral (Neural-PI) controller, that guarantees exponential input-to-state stability for each mode. To further improve performance, we present an online event-triggered switching algorithm to select the most suitable controller from a set of Neural-PI controllers, each optimized for specific inertia levels. Simulations on the IEEE 39-bus system validate the effectiveness of the proposed online switching control method with stability guarantees and optimized performance for frequency control under time-varying inertia. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15436v1-abstract-full').style.display = 'none'; document.getElementById('2408.15436v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14753">arXiv:2408.14753</a> <span> [<a href="https://arxiv.org/pdf/2408.14753">pdf</a>, <a href="https://arxiv.org/format/2408.14753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CoopASD: Cooperative Machine Anomalous Sound Detection with Privacy Concerns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+A">Anbai Jiang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuchen Shi</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+P">Pingyi Fan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wei-Qiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14753v1-abstract-short" style="display: inline;"> Machine anomalous sound detection (ASD) has emerged as one of the most promising applications in the Industrial Internet of Things (IIoT) due to its unprecedented efficacy in mitigating risks of malfunctions and promoting production efficiency. Previous works mainly investigated the machine ASD task under centralized settings. However, developing the ASD system under decentralized settings is cruc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14753v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14753v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14753v1-abstract-full" style="display: none;"> Machine anomalous sound detection (ASD) has emerged as one of the most promising applications in the Industrial Internet of Things (IIoT) due to its unprecedented efficacy in mitigating risks of malfunctions and promoting production efficiency. Previous works mainly investigated the machine ASD task under centralized settings. However, developing the ASD system under decentralized settings is crucial in practice, since the machine data are dispersed in various factories and the data should not be explicitly shared due to privacy concerns. To enable these factories to cooperatively develop a scalable ASD model while preserving their privacy, we propose a novel framework named CoopASD, where each factory trains an ASD model on its local dataset, and a central server aggregates these local models periodically. We employ a pre-trained model as the backbone of the ASD model to improve its robustness and develop specialized techniques to stabilize the model under a completely non-iid and domain shift setting. Compared with previous state-of-the-art (SOTA) models trained in centralized settings, CoopASD showcases competitive results with negligible degradation of 0.08%. We also conduct extensive ablation studies to demonstrate the effectiveness of CoopASD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14753v1-abstract-full').style.display = 'none'; document.getElementById('2408.14753v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by GLOBECOM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14116">arXiv:2408.14116</a> <span> [<a href="https://arxiv.org/pdf/2408.14116">pdf</a>, <a href="https://arxiv.org/format/2408.14116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Learning and Computing over Space-Ground Integrated Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jingyang Zhu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yong Zhou</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+C">Chunxiao Jiang</a>, <a href="/search/eess?searchtype=author&query=Kuang%2C+L">Linling Kuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14116v1-abstract-short" style="display: inline;"> Space-ground integrated networks hold great promise for providing global connectivity, particularly in remote areas where large amounts of valuable data are generated by Internet of Things (IoT) devices, but lacking terrestrial communication infrastructure. The massive data is conventionally transferred to the cloud server for centralized artificial intelligence (AI) models training, raising huge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14116v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14116v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14116v1-abstract-full" style="display: none;"> Space-ground integrated networks hold great promise for providing global connectivity, particularly in remote areas where large amounts of valuable data are generated by Internet of Things (IoT) devices, but lacking terrestrial communication infrastructure. The massive data is conventionally transferred to the cloud server for centralized artificial intelligence (AI) models training, raising huge communication overhead and privacy concerns. To address this, we propose a hierarchical learning and computing framework, which leverages the lowlatency characteristic of low-earth-orbit (LEO) satellites and the global coverage of geostationary-earth-orbit (GEO) satellites, to provide global aggregation services for locally trained models on ground IoT devices. Due to the time-varying nature of satellite network topology and the energy constraints of LEO satellites, efficiently aggregating the received local models from ground devices on LEO satellites is highly challenging. By leveraging the predictability of inter-satellite connectivity, modeling the space network as a directed graph, we formulate a network energy minimization problem for model aggregation, which turns out to be a Directed Steiner Tree (DST) problem. We propose a topologyaware energy-efficient routing (TAEER) algorithm to solve the DST problem by finding a minimum spanning arborescence on a substitute directed graph. Extensive simulations under realworld space-ground integrated network settings demonstrate that the proposed TAEER algorithm significantly reduces energy consumption and outperforms benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14116v1-abstract-full').style.display = 'none'; document.getElementById('2408.14116v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08228">arXiv:2408.08228</a> <span> [<a href="https://arxiv.org/pdf/2408.08228">pdf</a>, <a href="https://arxiv.org/format/2408.08228">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Medical Anomaly Detection in Brain MRI: An Image Quality Assessment Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Pan%2C+Z">Zixuan Pan</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+J">Jun Xia</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Z">Zheyu Yan</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+G">Guoyue Xu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yawen Wu</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+Z">Zhenge Jia</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jianxu Chen</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yiyu Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08228v1-abstract-short" style="display: inline;"> Reconstruction-based methods, particularly those leveraging autoencoders, have been widely adopted to perform anomaly detection in brain MRI. While most existing works try to improve detection accuracy by proposing new model structures or algorithms, we tackle the problem through image quality assessment, an underexplored perspective in the field. We propose a fusion quality loss function that com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08228v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08228v1-abstract-full" style="display: none;"> Reconstruction-based methods, particularly those leveraging autoencoders, have been widely adopted to perform anomaly detection in brain MRI. While most existing works try to improve detection accuracy by proposing new model structures or algorithms, we tackle the problem through image quality assessment, an underexplored perspective in the field. We propose a fusion quality loss function that combines Structural Similarity Index Measure loss with l1 loss, offering a more comprehensive evaluation of reconstruction quality. Additionally, we introduce a data pre-processing strategy that enhances the average intensity ratio (AIR) between normal and abnormal regions, further improving the distinction of anomalies. By fusing the aforementioned two methods, we devise the image quality assessment (IQA) approach. The proposed IQA approach achieves significant improvements (>10%) in terms of Dice coefficient (DICE) and Area Under the Precision-Recall Curve (AUPRC) on the BraTS21 (T2, FLAIR) and MSULB datasets when compared with state-of-the-art methods. These results highlight the importance of invoking the comprehensive image quality assessment in medical anomaly detection and provide a new perspective for future research in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08228v1-abstract-full').style.display = 'none'; document.getElementById('2408.08228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08074">arXiv:2408.08074</a> <span> [<a href="https://arxiv.org/pdf/2408.08074">pdf</a>, <a href="https://arxiv.org/format/2408.08074">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Integrated Sensing, Communication, and Computation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wen%2C+D">Dingzhu Wen</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yong Zhou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaoyang Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08074v2-abstract-short" style="display: inline;"> The forthcoming generation of wireless technology, 6G, aims to usher in an era of ubiquitous intelligent services, where everything is interconnected and intelligent. This vision requires the seamless integration of three fundamental modules: Sensing for information acquisition, communication for information sharing, and computation for information processing and decision-making. These modules are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08074v2-abstract-full').style.display = 'inline'; document.getElementById('2408.08074v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08074v2-abstract-full" style="display: none;"> The forthcoming generation of wireless technology, 6G, aims to usher in an era of ubiquitous intelligent services, where everything is interconnected and intelligent. This vision requires the seamless integration of three fundamental modules: Sensing for information acquisition, communication for information sharing, and computation for information processing and decision-making. These modules are intricately linked, especially in complex tasks such as edge learning and inference. However, the performance of these modules is interdependent, creating a resource competition for time, energy, and bandwidth. Existing techniques like integrated communication and computation (ICC), integrated sensing and computation (ISC), and integrated sensing and communication (ISAC) have made partial strides in addressing this challenge, but they fall short of meeting the extreme performance requirements. To overcome these limitations, it is essential to develop new techniques that comprehensively integrate sensing, communication, and computation. This integrated approach, known as Integrated Sensing, Communication, and Computation (ISCC), offers a systematic perspective for enhancing task performance. This paper begins with a comprehensive survey of historic and related techniques such as ICC, ISC, and ISAC, highlighting their strengths and limitations. It then discusses the benefits, functions, and challenges of ISCC. Subsequently, the state-of-the-art signal designs for ISCC, along with network resource management strategies specifically tailored for ISCC are explored. Furthermore, this paper discusses the exciting research opportunities that lie ahead for implementing ISCC in future advanced networks, and the unresolved issues requiring further investigation. ISCC is expected to unlock the full potential of intelligent connectivity, paving the way for groundbreaking applications and services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08074v2-abstract-full').style.display = 'none'; document.getElementById('2408.08074v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In this version, a series of discussions have been added.The benefits, functions, and challenges of ISCC are investigated using a new section. Moreover, the unresolved issues of ISCC have been discussed</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02085">arXiv:2408.02085</a> <span> [<a href="https://arxiv.org/pdf/2408.02085">pdf</a>, <a href="https://arxiv.org/format/2408.02085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Unleashing the Power of Data Tsunami: A Comprehensive Survey on Data Assessment and Selection for Instruction Tuning of Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qin%2C+Y">Yulei Qin</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yuncheng Yang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+P">Pengcheng Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gang Li</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+H">Hang Shao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuchen Shi</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Zihan Xu</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Y">Yun Gu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+K">Ke Li</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+X">Xing Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02085v3-abstract-short" style="display: inline;"> Instruction tuning plays a critical role in aligning large language models (LLMs) with human preference. Despite the vast amount of open instruction datasets, naively training a LLM on all existing instructions may not be optimal and practical. To pinpoint the most beneficial datapoints, data assessment and selection methods have been proposed in the fields of natural language processing (NLP) and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02085v3-abstract-full').style.display = 'inline'; document.getElementById('2408.02085v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02085v3-abstract-full" style="display: none;"> Instruction tuning plays a critical role in aligning large language models (LLMs) with human preference. Despite the vast amount of open instruction datasets, naively training a LLM on all existing instructions may not be optimal and practical. To pinpoint the most beneficial datapoints, data assessment and selection methods have been proposed in the fields of natural language processing (NLP) and deep learning. However, under the context of instruction tuning, there still exists a gap in knowledge on what kind of data evaluation metrics can be employed and how they can be integrated into the selection mechanism. To bridge this gap, we present a comprehensive review on existing literature of data assessment and selection especially for instruction tuning of LLMs. We systematically categorize all applicable methods into quality-based, diversity-based, and importance-based ones where a unified, fine-grained taxonomy is structured. For each category, representative methods are elaborated to describe the landscape of relevant research. In addition, comparison between latest methods is conducted on their officially reported results to provide in-depth discussions on their limitations. Finally, we summarize the open challenges and propose the promosing avenues for future studies. All related contents are available at https://github.com/yuleiqin/fantastic-data-engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02085v3-abstract-full').style.display = 'none'; document.getElementById('2408.02085v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">review, survey, 28 pages, 2 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14823">arXiv:2407.14823</a> <span> [<a href="https://arxiv.org/pdf/2407.14823">pdf</a>, <a href="https://arxiv.org/format/2407.14823">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> CrossDehaze: Scaling Up Image Dehazing with Cross-Data Vision Alignment and Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yukai Shi</a>, <a href="/search/eess?searchtype=author&query=Weng%2C+Z">Zhipeng Weng</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yupei Lin</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+C">Cidan Shi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xiaojun Yang</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+L">Liang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14823v1-abstract-short" style="display: inline;"> In recent years, as computer vision tasks have increasingly relied on high-quality image inputs, the task of image dehazing has received significant attention. Previously, many methods based on priors and deep learning have been proposed to address the task of image dehazing. Ignoring the domain gap between different data, former de-hazing methods usually adopt multiple datasets for explicit train… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14823v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14823v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14823v1-abstract-full" style="display: none;"> In recent years, as computer vision tasks have increasingly relied on high-quality image inputs, the task of image dehazing has received significant attention. Previously, many methods based on priors and deep learning have been proposed to address the task of image dehazing. Ignoring the domain gap between different data, former de-hazing methods usually adopt multiple datasets for explicit training, which often makes the methods themselves be violated. To address this problem, we propose a novel method of internal and external data augmentation to improve the existing dehazing methodology. By using cross-data external augmentor. The dataset inherits samples from different domains that are firmly aligned, making the model learn more robust and generalizable features. By using the internal data augmentation method, the model can fully exploit local information within the images, thereby obtaining more image details. To demonstrate the effectiveness of our proposed method, we conduct training on both the Natural Image Dataset (NID) and the Remote Sensing Image Dataset (RSID). Experimental results show that our method clearly resolves the domain gap in different dehazing datasets and presents a new pipeline for joint training in the dehazing task. Our approach significantly outperforms other advanced methods in dehazing and produces dehazed images that are closest to real haze-free images. The code will be available at: https://github.com/wengzp1/ScaleUpDehazing <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14823v1-abstract-full').style.display = 'none'; document.getElementById('2407.14823v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A cross-dataset vision alignment and augmentation technology is proposed to boost generalizable feature learning in the de-hazing task</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13210">arXiv:2407.13210</a> <span> [<a href="https://arxiv.org/pdf/2407.13210">pdf</a>, <a href="https://arxiv.org/format/2407.13210">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Improved Esophageal Varices Assessment from Non-Contrast CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+C">Chunli Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaoming Zhang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+X">Xiaoli Yin</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Ling Zhang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+K">Ke Yan</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yu Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13210v1-abstract-short" style="display: inline;"> Esophageal varices (EV), a serious health concern resulting from portal hypertension, are traditionally diagnosed through invasive endoscopic procedures. Despite non-contrast computed tomography (NC-CT) imaging being a less expensive and non-invasive imaging modality, it has yet to gain full acceptance as a primary clinical diagnostic tool for EV evaluation. To overcome existing diagnostic challen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13210v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13210v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13210v1-abstract-full" style="display: none;"> Esophageal varices (EV), a serious health concern resulting from portal hypertension, are traditionally diagnosed through invasive endoscopic procedures. Despite non-contrast computed tomography (NC-CT) imaging being a less expensive and non-invasive imaging modality, it has yet to gain full acceptance as a primary clinical diagnostic tool for EV evaluation. To overcome existing diagnostic challenges, we present the Multi-Organ-cOhesion-Network (MOON), a novel framework enhancing the analysis of critical organ features in NC-CT scans for effective assessment of EV. Drawing inspiration from the thorough assessment practices of radiologists, MOON establishes a cohesive multiorgan analysis model that unifies the imaging features of the related organs of EV, namely esophagus, liver, and spleen. This integration significantly increases the diagnostic accuracy for EV. We have compiled an extensive NC-CT dataset of 1,255 patients diagnosed with EV, spanning three grades of severity. Each case is corroborated by endoscopic diagnostic results. The efficacy of MOON has been substantiated through a validation process involving multi-fold cross-validation on 1,010 cases and an independent test on 245 cases, exhibiting superior diagnostic performance compared to methods focusing solely on the esophagus (for classifying severe grade: AUC of 0.864 versus 0.803, and for moderate to severe grades: AUC of 0.832 versus 0.793). To our knowledge, MOON is the first work to incorporate a synchronized multi-organ NC-CT analysis for EV assessment, providing a more acceptable and minimally invasive alternative for patients compared to traditional endoscopy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13210v1-abstract-full').style.display = 'none'; document.getElementById('2407.13210v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Early accepted to MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03966">arXiv:2407.03966</a> <span> [<a href="https://arxiv.org/pdf/2407.03966">pdf</a>, <a href="https://arxiv.org/format/2407.03966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Serialized Output Training by Learned Dominance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Ying Shi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Lantian Li</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+S">Shi Yin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/eess?searchtype=author&query=Han%2C+J">Jiqing Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03966v1-abstract-short" style="display: inline;"> Serialized Output Training (SOT) has showcased state-of-the-art performance in multi-talker speech recognition by sequentially decoding the speech of individual speakers. To address the challenging label-permutation issue, prior methods have relied on either the Permutation Invariant Training (PIT) or the time-based First-In-First-Out (FIFO) rule. This study presents a model-based serialization st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03966v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03966v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03966v1-abstract-full" style="display: none;"> Serialized Output Training (SOT) has showcased state-of-the-art performance in multi-talker speech recognition by sequentially decoding the speech of individual speakers. To address the challenging label-permutation issue, prior methods have relied on either the Permutation Invariant Training (PIT) or the time-based First-In-First-Out (FIFO) rule. This study presents a model-based serialization strategy that incorporates an auxiliary module into the Attention Encoder-Decoder architecture, autonomously identifying the crucial factors to order the output sequence of the speech components in multi-talker speech. Experiments conducted on the LibriSpeech and LibriMix databases reveal that our approach significantly outperforms the PIT and FIFO baselines in both 2-mix and 3-mix scenarios. Further analysis shows that the serialization module identifies dominant speech components in a mixture by factors including loudness and gender, and orders speech components based on the dominance score. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03966v1-abstract-full').style.display = 'none'; document.getElementById('2407.03966v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by INTERSPEECH 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03772">arXiv:2407.03772</a> <span> [<a href="https://arxiv.org/pdf/2407.03772">pdf</a>, <a href="https://arxiv.org/format/2407.03772">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> CS3: Cascade SAM for Sperm Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yi Shi</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+X">Xu-Peng Tian</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yun-Kai Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+T">Tie-Yi Zhang</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+B">Bin Yao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hui Wang</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+Y">Yong Shao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Cen-Cen Wang</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+R">Rong Zeng</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+D">De-Chuan Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03772v2-abstract-short" style="display: inline;"> Automated sperm morphology analysis plays a crucial role in the assessment of male fertility, yet its efficacy is often compromised by the challenges in accurately segmenting sperm images. Existing segmentation techniques, including the Segment Anything Model(SAM), are notably inadequate in addressing the complex issue of sperm overlap-a frequent occurrence in clinical samples. Our exploratory stu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03772v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03772v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03772v2-abstract-full" style="display: none;"> Automated sperm morphology analysis plays a crucial role in the assessment of male fertility, yet its efficacy is often compromised by the challenges in accurately segmenting sperm images. Existing segmentation techniques, including the Segment Anything Model(SAM), are notably inadequate in addressing the complex issue of sperm overlap-a frequent occurrence in clinical samples. Our exploratory studies reveal that modifying image characteristics by removing sperm heads and easily segmentable areas, alongside enhancing the visibility of overlapping regions, markedly enhances SAM's efficiency in segmenting intricate sperm structures. Motivated by these findings, we present the Cascade SAM for Sperm Segmentation (CS3), an unsupervised approach specifically designed to tackle the issue of sperm overlap. This method employs a cascade application of SAM to segment sperm heads, simple tails, and complex tails in stages. Subsequently, these segmented masks are meticulously matched and joined to construct complete sperm masks. In collaboration with leading medical institutions, we have compiled a dataset comprising approximately 2,000 unlabeled sperm images to fine-tune our method, and secured expert annotations for an additional 240 images to facilitate comprehensive model assessment. Experimental results demonstrate superior performance of CS3 compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03772v2-abstract-full').style.display = 'none'; document.getElementById('2407.03772v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Early accepted by MICCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03648">arXiv:2407.03648</a> <span> [<a href="https://arxiv.org/pdf/2407.03648">pdf</a>, <a href="https://arxiv.org/format/2407.03648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> High Fidelity Text-Guided Music Editing via Single-Stage Flow Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lan%2C+G+L">Gael Le Lan</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+B">Bowen Shi</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+Z">Zhaoheng Ni</a>, <a href="/search/eess?searchtype=author&query=Srinivasan%2C+S">Sidd Srinivasan</a>, <a href="/search/eess?searchtype=author&query=Kumar%2C+A">Anurag Kumar</a>, <a href="/search/eess?searchtype=author&query=Ellis%2C+B">Brian Ellis</a>, <a href="/search/eess?searchtype=author&query=Kant%2C+D">David Kant</a>, <a href="/search/eess?searchtype=author&query=Nagaraja%2C+V">Varun Nagaraja</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+E">Ernie Chang</a>, <a href="/search/eess?searchtype=author&query=Hsu%2C+W">Wei-Ning Hsu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yangyang Shi</a>, <a href="/search/eess?searchtype=author&query=Chandra%2C+V">Vikas Chandra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03648v2-abstract-short" style="display: inline;"> We introduce MelodyFlow, an efficient text-controllable high-fidelity music generation and editing model. It operates on continuous latent representations from a low frame rate 48 kHz stereo variational auto encoder codec. Based on a diffusion transformer architecture trained on a flow-matching objective the model can edit diverse high quality stereo samples of variable duration, with simple text… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03648v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03648v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03648v2-abstract-full" style="display: none;"> We introduce MelodyFlow, an efficient text-controllable high-fidelity music generation and editing model. It operates on continuous latent representations from a low frame rate 48 kHz stereo variational auto encoder codec. Based on a diffusion transformer architecture trained on a flow-matching objective the model can edit diverse high quality stereo samples of variable duration, with simple text descriptions. We adapt the ReNoise latent inversion method to flow matching and compare it with the original implementation and naive denoising diffusion implicit model (DDIM) inversion on a variety of music editing prompts. Our results indicate that our latent inversion outperforms both ReNoise and DDIM for zero-shot test-time text-guided editing on several objective metrics. Subjective evaluations exhibit a substantial improvement over previous state of the art for music editing. Code and model weights will be publicly made available. Samples are available at https://melodyflow.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03648v2-abstract-full').style.display = 'none'; document.getElementById('2407.03648v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02924">arXiv:2407.02924</a> <span> [<a href="https://arxiv.org/pdf/2407.02924">pdf</a>, <a href="https://arxiv.org/format/2407.02924">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Federated Fine-Tuning for Pre-Trained Foundation Models Over Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zixin Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yong Zhou</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled. B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02924v1-abstract-short" style="display: inline;"> Pre-trained foundation models (FMs), with extensive number of neurons, are key to advancing next-generation intelligence services, where personalizing these models requires massive amount of task-specific data and computational resources. The prevalent solution involves centralized processing at the edge server, which, however, raises privacy concerns due to the transmission of raw data. Instead,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02924v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02924v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02924v1-abstract-full" style="display: none;"> Pre-trained foundation models (FMs), with extensive number of neurons, are key to advancing next-generation intelligence services, where personalizing these models requires massive amount of task-specific data and computational resources. The prevalent solution involves centralized processing at the edge server, which, however, raises privacy concerns due to the transmission of raw data. Instead, federated fine-tuning (FedFT) is an emerging privacy-preserving fine-tuning (FT) paradigm for personalized pre-trained foundation models. In particular, by integrating low-rank adaptation (LoRA) with federated learning (FL), federated LoRA enables the collaborative FT of a global model with edge devices, achieving comparable learning performance to full FT while training fewer parameters over distributed data and preserving raw data privacy. However, the limited radio resources and computation capabilities of edge devices pose significant challenges for deploying federated LoRA over wireless networks. To this paper, we propose a split federated LoRA framework, which deploys the computationally-intensive encoder of a pre-trained model at the edge server, while keeping the embedding and task modules at the edge devices. Building on this split framework, the paper provides a rigorous analysis of the upper bound of the convergence gap for the wireless federated LoRA system. This analysis motivates the formulation of a long-term upper bound minimization problem, where we decompose the formulated long-term mixed-integer programming (MIP) problem into sequential sub-problems using the Lyapunov technique. We then develop an online algorithm for effective device scheduling and bandwidth allocation. Simulation results demonstrate the effectiveness of the proposed online algorithm in enhancing learning performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02924v1-abstract-full').style.display = 'none'; document.getElementById('2407.02924v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02681">arXiv:2407.02681</a> <span> [<a href="https://arxiv.org/pdf/2407.02681">pdf</a>, <a href="https://arxiv.org/format/2407.02681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Uniform Transformation: Refining Latent Representation in Variational Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Ye Shi</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C+S+G">C. S. George Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02681v1-abstract-short" style="display: inline;"> Irregular distribution in latent space causes posterior collapse, misalignment between posterior and prior, and ill-sampling problem in Variational Autoencoders (VAEs). In this paper, we introduce a novel adaptable three-stage Uniform Transformation (UT) module -- Gaussian Kernel Density Estimation (G-KDE) clustering, non-parametric Gaussian Mixture (GM) Modeling, and Probability Integral Transfor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02681v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02681v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02681v1-abstract-full" style="display: none;"> Irregular distribution in latent space causes posterior collapse, misalignment between posterior and prior, and ill-sampling problem in Variational Autoencoders (VAEs). In this paper, we introduce a novel adaptable three-stage Uniform Transformation (UT) module -- Gaussian Kernel Density Estimation (G-KDE) clustering, non-parametric Gaussian Mixture (GM) Modeling, and Probability Integral Transform (PIT) -- to address irregular latent distributions. By reconfiguring irregular distributions into a uniform distribution in the latent space, our approach significantly enhances the disentanglement and interpretability of latent representations, overcoming the limitation of traditional VAE models in capturing complex data structures. Empirical evaluations demonstrated the efficacy of our proposed UT module in improving disentanglement metrics across benchmark datasets -- dSprites and MNIST. Our findings suggest a promising direction for advancing representation learning techniques, with implication for future research in extending this framework to more sophisticated datasets and downstream tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02681v1-abstract-full').style.display = 'none'; document.getElementById('2407.02681v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by 2024 IEEE 20th International Conference on Automation Science and Engineering</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01745">arXiv:2407.01745</a> <span> [<a href="https://arxiv.org/pdf/2407.01745">pdf</a>, <a href="https://arxiv.org/format/2407.01745">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> </div> </div> <p class="title is-5 mathjax"> Adaptive control of reaction-diffusion PDEs via neural operator-approximated gain kernels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bhan%2C+L">Luke Bhan</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a>, <a href="/search/eess?searchtype=author&query=Krstic%2C+M">Miroslav Krstic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01745v1-abstract-short" style="display: inline;"> Neural operator approximations of the gain kernels in PDE backstepping has emerged as a viable method for implementing controllers in real time. With such an approach, one approximates the gain kernel, which maps the plant coefficient into the solution of a PDE, with a neural operator. It is in adaptive control that the benefit of the neural operator is realized, as the kernel PDE solution needs t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01745v1-abstract-full').style.display = 'inline'; document.getElementById('2407.01745v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01745v1-abstract-full" style="display: none;"> Neural operator approximations of the gain kernels in PDE backstepping has emerged as a viable method for implementing controllers in real time. With such an approach, one approximates the gain kernel, which maps the plant coefficient into the solution of a PDE, with a neural operator. It is in adaptive control that the benefit of the neural operator is realized, as the kernel PDE solution needs to be computed online, for every updated estimate of the plant coefficient. We extend the neural operator methodology from adaptive control of a hyperbolic PDE to adaptive control of a benchmark parabolic PDE (a reaction-diffusion equation with a spatially-varying and unknown reaction coefficient). We prove global stability and asymptotic regulation of the plant state for a Lyapunov design of parameter adaptation. The key technical challenge of the result is handling the 2D nature of the gain kernels and proving that the target system with two distinct sources of perturbation terms, due to the parameter estimation error and due to the neural approximation error, is Lyapunov stable. To verify our theoretical result, we present simulations achieving calculation speedups up to 45x relative to the traditional finite difference solvers for every timestep in the simulation trajectory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01745v1-abstract-full').style.display = 'none'; document.getElementById('2407.01745v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00955">arXiv:2407.00955</a> <span> [<a href="https://arxiv.org/pdf/2407.00955">pdf</a>, <a href="https://arxiv.org/format/2407.00955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Task-oriented Over-the-air Computation for Edge-device Co-inference with Balanced Classification Accuracy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiao%2C+X">Xiang Jiao</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+D">Dingzhu Wen</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+G">Guangxu Zhu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+W">Wei Jiang</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+W">Wu Luo</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00955v1-abstract-short" style="display: inline;"> Edge-device co-inference, which concerns the cooperation between edge devices and an edge server for completing inference tasks over wireless networks, has been a promising technique for enabling various kinds of intelligent services at the network edge, e.g., auto-driving. In this paradigm, the concerned design objective of the network shifts from the traditional communication throughput to the e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00955v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00955v1-abstract-full" style="display: none;"> Edge-device co-inference, which concerns the cooperation between edge devices and an edge server for completing inference tasks over wireless networks, has been a promising technique for enabling various kinds of intelligent services at the network edge, e.g., auto-driving. In this paradigm, the concerned design objective of the network shifts from the traditional communication throughput to the effective and efficient execution of the inference task underpinned by the network, measured by, e.g., the inference accuracy and latency. In this paper, a task-oriented over-the-air computation scheme is proposed for a multidevice artificial intelligence system. Particularly, a novel tractable inference accuracy metric is proposed for classification tasks, which is called minimum pair-wise discriminant gain. Unlike prior work measuring the average of all class pairs in feature space, it measures the minimum distance of all class pairs. By maximizing the minimum pair-wise discriminant gain instead of its average counterpart, any pair of classes can be better separated in the feature space, and thus leading to a balanced and improved inference accuracy for all classes. Besides, this paper jointly optimizes the minimum discriminant gain of all feature elements instead of separately maximizing that of each element in the existing designs. As a result, the transmit power can be adaptively allocated to the feature elements according to their different contributions to the inference accuracy, opening an extra degree of freedom to improve inference performance. Extensive experiments are conducted using a concrete use case of human motion recognition to verify the superiority of the proposed design over the benchmarking scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00955v1-abstract-full').style.display = 'none'; document.getElementById('2407.00955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was accepted by IEEE Transactions on Vehicular Technology on June 30, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15675">arXiv:2406.15675</a> <span> [<a href="https://arxiv.org/pdf/2406.15675">pdf</a>, <a href="https://arxiv.org/format/2406.15675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Symbolic Computation">cs.SC</span> </div> </div> <p class="title is-5 mathjax"> Combining Neural Networks and Symbolic Regression for Analytical Lyapunov Function Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+J">Jie Feng</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+H">Haohan Zou</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15675v3-abstract-short" style="display: inline;"> We propose CoNSAL (Combining Neural networks and Symbolic regression for Analytical Lyapunov function) to construct analytical Lyapunov functions for nonlinear dynamic systems. This framework contains a neural Lyapunov function and a symbolic regression component, where symbolic regression is applied to distill the neural network to precise analytical forms. Our approach utilizes symbolic regressi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15675v3-abstract-full').style.display = 'inline'; document.getElementById('2406.15675v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15675v3-abstract-full" style="display: none;"> We propose CoNSAL (Combining Neural networks and Symbolic regression for Analytical Lyapunov function) to construct analytical Lyapunov functions for nonlinear dynamic systems. This framework contains a neural Lyapunov function and a symbolic regression component, where symbolic regression is applied to distill the neural network to precise analytical forms. Our approach utilizes symbolic regression not only as a tool for translation but also as a means to uncover counterexamples. This procedure terminates when no counterexamples are found in the analytical formulation. Compared with previous results, CoNSAL directly produces an analytical form of the Lyapunov function with improved interpretability in both the learning process and the final results. We apply CoNSAL to 2-D inverted pendulum, path following, Van Der Pol Oscillator, 3-D trig dynamics, 4-D rotating wheel pendulum, 6-D 3-bus power system, and demonstrate that our algorithm successfully finds their valid Lyapunov functions. Code examples are available at https://github.com/HaohanZou/CoNSAL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15675v3-abstract-full').style.display = 'none'; document.getElementById('2406.15675v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Workshop paper, accepted by Workshop on Foundations of Reinforcement Learning and Control at the 41st International Conference on Machine Learning, Vienna, Austria</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13209">arXiv:2406.13209</a> <span> [<a href="https://arxiv.org/pdf/2406.13209">pdf</a>, <a href="https://arxiv.org/format/2406.13209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Diffusion Model-based FOD Restoration from High Distortion in dMRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+S">Shuo Huang</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+L">Lujia Zhong</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yonggang Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13209v1-abstract-short" style="display: inline;"> Fiber orientation distributions (FODs) is a popular model to represent the diffusion MRI (dMRI) data. However, imaging artifacts such as susceptibility-induced distortion in dMRI can cause signal loss and lead to the corrupted reconstruction of FODs, which prohibits successful fiber tracking and connectivity analysis in affected brain regions such as the brain stem. Generative models, such as the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13209v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13209v1-abstract-full" style="display: none;"> Fiber orientation distributions (FODs) is a popular model to represent the diffusion MRI (dMRI) data. However, imaging artifacts such as susceptibility-induced distortion in dMRI can cause signal loss and lead to the corrupted reconstruction of FODs, which prohibits successful fiber tracking and connectivity analysis in affected brain regions such as the brain stem. Generative models, such as the diffusion models, have been successfully applied in various image restoration tasks. However, their application on FOD images poses unique challenges since FODs are 4-dimensional data represented by spherical harmonics (SPHARM) with the 4-th dimension exhibiting order-related dependency. In this paper, we propose a novel diffusion model for FOD restoration that can recover the signal loss caused by distortion artifacts. We use volume-order encoding to enhance the ability of the diffusion model to generate individual FOD volumes at all SPHARM orders. Moreover, we add cross-attention features extracted across all SPHARM orders in generating every individual FOD volume to capture the order-related dependency across FOD volumes. We also condition the diffusion model with low-distortion FODs surrounding high-distortion areas to maintain the geometric coherence of the generated FODs. We trained and tested our model using data from the UK Biobank (n = 1315). On a test set with ground truth (n = 43), we demonstrate the high accuracy of the generated FODs in terms of root mean square errors of FOD volumes and angular errors of FOD peaks. We also apply our method to a test set with large distortion in the brain stem area (n = 1172) and demonstrate the efficacy of our method in restoring the FOD integrity and, hence, greatly improving tractography performance in affected brain regions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13209v1-abstract-full').style.display = 'none'; document.getElementById('2406.13209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10434">arXiv:2406.10434</a> <span> [<a href="https://arxiv.org/pdf/2406.10434">pdf</a>, <a href="https://arxiv.org/format/2406.10434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Risk-Aware Value-Oriented Net Demand Forecasting for Virtual Power Plants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yufan Zhang</a>, <a href="/search/eess?searchtype=author&query=Han%2C+J">Jiajun Han</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10434v1-abstract-short" style="display: inline;"> This paper develops a risk-aware net demand forecasting product for virtual power plants, which helps reduce the risk of high operation costs. At the training phase, a bilevel program for parameter estimation is formulated, where the upper level optimizes over the forecast model parameter to minimize the conditional value-at-risk (a risk metric) of operation costs. The lower level solves the opera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10434v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10434v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10434v1-abstract-full" style="display: none;"> This paper develops a risk-aware net demand forecasting product for virtual power plants, which helps reduce the risk of high operation costs. At the training phase, a bilevel program for parameter estimation is formulated, where the upper level optimizes over the forecast model parameter to minimize the conditional value-at-risk (a risk metric) of operation costs. The lower level solves the operation problems given the forecast. Leveraging the specific structure of the operation problem, we show that the bilevel program is equivalent to a convex program when the forecast model is linear. Numerical results show that our approach effectively reduces the risk of high costs compared to the forecasting approach developed for risk-neutral decision makers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10434v1-abstract-full').style.display = 'none'; document.getElementById('2406.10434v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to The 56th North American Power Symposium (NAPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09569">arXiv:2406.09569</a> <span> [<a href="https://arxiv.org/pdf/2406.09569">pdf</a>, <a href="https://arxiv.org/format/2406.09569">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speech ReaLLM -- Real-time Streaming Speech Recognition with Multimodal LLMs by Teaching the Flow of Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Seide%2C+F">Frank Seide</a>, <a href="/search/eess?searchtype=author&query=Doulaty%2C+M">Morrie Doulaty</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yangyang Shi</a>, <a href="/search/eess?searchtype=author&query=Gaur%2C+Y">Yashesh Gaur</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+J">Junteng Jia</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+C">Chunyang Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09569v1-abstract-short" style="display: inline;"> We introduce Speech ReaLLM, a new ASR architecture that marries "decoder-only" ASR with the RNN-T to make multimodal LLM architectures capable of real-time streaming. This is the first "decoder-only" ASR architecture designed to handle continuous audio without explicit end-pointing. Speech ReaLLM is a special case of the more general ReaLLM ("real-time LLM") approach, also introduced here for the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09569v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09569v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09569v1-abstract-full" style="display: none;"> We introduce Speech ReaLLM, a new ASR architecture that marries "decoder-only" ASR with the RNN-T to make multimodal LLM architectures capable of real-time streaming. This is the first "decoder-only" ASR architecture designed to handle continuous audio without explicit end-pointing. Speech ReaLLM is a special case of the more general ReaLLM ("real-time LLM") approach, also introduced here for the first time. The idea is inspired by RNN-T: Instead of generating a response only at the end of a user prompt, generate after every input token received in real time (it is often empty). On Librispeech "test", an 80M Speech ReaLLM achieves WERs of 3.0% and 7.4% in real time (without an external LM or auxiliary loss). This is only slightly above a 3x larger Attention-Encoder-Decoder baseline. We also show that this way, an LLM architecture can learn to represent and reproduce the flow of time; and that a pre-trained 7B LLM can be fine-tuned to do reasonably well on this task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09569v1-abstract-full').style.display = 'none'; document.getElementById('2406.09569v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20489">arXiv:2405.20489</a> <span> [<a href="https://arxiv.org/pdf/2405.20489">pdf</a>, <a href="https://arxiv.org/format/2405.20489">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Stability-Constrained Learning for Frequency Regulation in Power Grids with Variable Inertia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+J">Jie Feng</a>, <a href="/search/eess?searchtype=author&query=Muralidharan%2C+M">Manasa Muralidharan</a>, <a href="/search/eess?searchtype=author&query=Henriquez-Auba%2C+R">Rodrigo Henriquez-Auba</a>, <a href="/search/eess?searchtype=author&query=Hidalgo-Gonzalez%2C+P">Patricia Hidalgo-Gonzalez</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20489v2-abstract-short" style="display: inline;"> The increasing penetration of converter-based renewable generation has resulted in faster frequency dynamics, and low and variable inertia. As a result, there is a need for frequency control methods that are able to stabilize a disturbance in the power system at timescales comparable to the fast converter dynamics. This paper proposes a combined linear and neural network controller for inverter-ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20489v2-abstract-full').style.display = 'inline'; document.getElementById('2405.20489v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20489v2-abstract-full" style="display: none;"> The increasing penetration of converter-based renewable generation has resulted in faster frequency dynamics, and low and variable inertia. As a result, there is a need for frequency control methods that are able to stabilize a disturbance in the power system at timescales comparable to the fast converter dynamics. This paper proposes a combined linear and neural network controller for inverter-based primary frequency control that is stable at time-varying levels of inertia. We model the time-variance in inertia via a switched affine hybrid system model. We derive stability certificates for the proposed controller via a quadratic candidate Lyapunov function. We test the proposed control on a 12-bus 3-area test network, and compare its performance with a base case linear controller, optimized linear controller, and finite-horizon Linear Quadratic Regulator (LQR). Our proposed controller achieves faster mean settling time and over 50% reduction in average control cost across $100$ inertia scenarios compared to the optimized linear controller. Unlike LQR which requires complete knowledge of the inertia trajectories and system dynamics over the entire control time horizon, our proposed controller is real-time tractable, and achieves comparable performance to LQR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20489v2-abstract-full').style.display = 'none'; document.getElementById('2405.20489v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is to appear in IEEE Control System Letters (L-CSS)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16258">arXiv:2405.16258</a> <span> [<a href="https://arxiv.org/pdf/2405.16258">pdf</a>, <a href="https://arxiv.org/format/2405.16258">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> USD: Unsupervised Soft Contrastive Learning for Fault Detection in Multivariate Time Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hong Liu</a>, <a href="/search/eess?searchtype=author&query=Qiu%2C+X">Xiuxiu Qiu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yiming Shi</a>, <a href="/search/eess?searchtype=author&query=Zang%2C+Z">Zelin Zang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16258v1-abstract-short" style="display: inline;"> Unsupervised fault detection in multivariate time series is critical for maintaining the integrity and efficiency of complex systems, with current methodologies largely focusing on statistical and machine learning techniques. However, these approaches often rest on the assumption that data distributions conform to Gaussian models, overlooking the diversity of patterns that can manifest in both nor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16258v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16258v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16258v1-abstract-full" style="display: none;"> Unsupervised fault detection in multivariate time series is critical for maintaining the integrity and efficiency of complex systems, with current methodologies largely focusing on statistical and machine learning techniques. However, these approaches often rest on the assumption that data distributions conform to Gaussian models, overlooking the diversity of patterns that can manifest in both normal and abnormal states, thereby diminishing discriminative performance. Our innovation addresses this limitation by introducing a combination of data augmentation and soft contrastive learning, specifically designed to capture the multifaceted nature of state behaviors more accurately. The data augmentation process enriches the dataset with varied representations of normal states, while soft contrastive learning fine-tunes the model's sensitivity to the subtle differences between normal and abnormal patterns, enabling it to recognize a broader spectrum of anomalies. This dual strategy significantly boosts the model's ability to distinguish between normal and abnormal states, leading to a marked improvement in fault detection performance across multiple datasets and settings, thereby setting a new benchmark for unsupervised fault detection in complex systems. The code of our method is available at \url{https://github.com/zangzelin/code_USD.git}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16258v1-abstract-full').style.display = 'none'; document.getElementById('2405.16258v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 7 figures, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13199">arXiv:2405.13199</a> <span> [<a href="https://arxiv.org/pdf/2405.13199">pdf</a>, <a href="https://arxiv.org/ps/2405.13199">ps</a>, <a href="https://arxiv.org/format/2405.13199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TauAD: MRI-free Tau Anomaly Detection in PET Imaging via Conditioned Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhong%2C+L">Lujia Zhong</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+S">Shuo Huang</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+J">Jiaxin Yue</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianwei Zhang</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+Z">Zhiwei Deng</a>, <a href="/search/eess?searchtype=author&query=Chi%2C+W">Wenhao Chi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yonggang Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13199v1-abstract-short" style="display: inline;"> The emergence of tau PET imaging over the last decade has enabled Alzheimer's disease (AD) researchers to examine tau pathology in vivo and more effectively characterize the disease trajectories of AD. Current tau PET analysis methods, however, typically perform inferences on large cortical ROIs and are limited in the detection of localized tau pathology that varies across subjects. Furthermore, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13199v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13199v1-abstract-full" style="display: none;"> The emergence of tau PET imaging over the last decade has enabled Alzheimer's disease (AD) researchers to examine tau pathology in vivo and more effectively characterize the disease trajectories of AD. Current tau PET analysis methods, however, typically perform inferences on large cortical ROIs and are limited in the detection of localized tau pathology that varies across subjects. Furthermore, a high-resolution MRI is required to carry out conventional tau PET analysis, which is not commonly acquired in clinical practices and may not be acquired for many elderly patients with dementia due to strong motion artifacts, claustrophobia, or certain metal implants. In this work, we propose a novel conditional diffusion model to perform MRI-free anomaly detection from tau PET imaging data. By including individualized conditions and two complementary loss maps from pseudo-healthy and pseudo-unhealthy reconstructions, our model computes an anomaly map across the entire brain area that allows simply training a support vector machine (SVM) for classifying disease severity. We train our model on ADNI subjects (n=534) and evaluate its performance on a separate dataset from the preclinical subjects of the A4 clinical trial (n=447). We demonstrate that our method outperforms baseline generative models and the conventional Z-score-based method in anomaly localization without mis-detecting off-target bindings in sub-cortical and out-of-brain areas. By classifying the A4 subjects according to their anomaly map using the SVM trained on ADNI data, we show that our method can successfully group preclinical subjects with significantly different cognitive functions, which further demonstrates the effectiveness of our method in capturing biologically relevant anomaly in tau PET imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13199v1-abstract-full').style.display = 'none'; document.getElementById('2405.13199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11401">arXiv:2405.11401</a> <span> [<a href="https://arxiv.org/pdf/2405.11401">pdf</a>, <a href="https://arxiv.org/format/2405.11401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> PDE Control Gym: A Benchmark for Data-Driven Boundary Control of Partial Differential Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bhan%2C+L">Luke Bhan</a>, <a href="/search/eess?searchtype=author&query=Bian%2C+Y">Yuexin Bian</a>, <a href="/search/eess?searchtype=author&query=Krstic%2C+M">Miroslav Krstic</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11401v2-abstract-short" style="display: inline;"> Over the last decade, data-driven methods have surged in popularity, emerging as valuable tools for control theory. As such, neural network approximations of control feedback laws, system dynamics, and even Lyapunov functions have attracted growing attention. With the ascent of learning based control, the need for accurate, fast, and easy-to-use benchmarks has increased. In this work, we present t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11401v2-abstract-full').style.display = 'inline'; document.getElementById('2405.11401v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11401v2-abstract-full" style="display: none;"> Over the last decade, data-driven methods have surged in popularity, emerging as valuable tools for control theory. As such, neural network approximations of control feedback laws, system dynamics, and even Lyapunov functions have attracted growing attention. With the ascent of learning based control, the need for accurate, fast, and easy-to-use benchmarks has increased. In this work, we present the first learning-based environment for boundary control of PDEs. In our benchmark, we introduce three foundational PDE problems - a 1D transport PDE, a 1D reaction-diffusion PDE, and a 2D Navier-Stokes PDE - whose solvers are bundled in an user-friendly reinforcement learning gym. With this gym, we then present the first set of model-free, reinforcement learning algorithms for solving this series of benchmark problems, achieving stability, although at a higher cost compared to model-based PDE backstepping. With the set of benchmark environments and detailed examples, this work significantly lowers the barrier to entry for learning-based PDE control - a topic largely unexplored by the data-driven control community. The entire benchmark is available on Github along with detailed documentation and the presented reinforcement learning models are open sourced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11401v2-abstract-full').style.display = 'none'; document.getElementById('2405.11401v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages 10 figures. Accepted L4DC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09004">arXiv:2405.09004</a> <span> [<a href="https://arxiv.org/pdf/2405.09004">pdf</a>, <a href="https://arxiv.org/format/2405.09004">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Sequential Market Clearing via Value-oriented Renewable Energy Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yufan Zhang</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+H">Honglin Wen</a>, <a href="/search/eess?searchtype=author&query=Bian%2C+Y">Yuexin Bian</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanyuan Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09004v1-abstract-short" style="display: inline;"> Large penetration of renewable energy sources (RESs) brings huge uncertainty into the electricity markets. While existing deterministic market clearing fails to accommodate the uncertainty, the recently proposed stochastic market clearing struggles to achieve desirable market properties. In this work, we propose a value-oriented forecasting approach, which tactically determines the RESs generation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09004v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09004v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09004v1-abstract-full" style="display: none;"> Large penetration of renewable energy sources (RESs) brings huge uncertainty into the electricity markets. While existing deterministic market clearing fails to accommodate the uncertainty, the recently proposed stochastic market clearing struggles to achieve desirable market properties. In this work, we propose a value-oriented forecasting approach, which tactically determines the RESs generation that enters the day-ahead market. With such a forecast, the existing deterministic market clearing framework can be maintained, and the day-ahead and real-time overall operation cost is reduced. At the training phase, the forecast model parameters are estimated to minimize expected day-ahead and real-time overall operation costs, instead of minimizing forecast errors in a statistical sense. Theoretically, we derive the exact form of the loss function for training the forecast model that aligns with such a goal. For market clearing modeled by linear programs, this loss function is a piecewise linear function. Additionally, we derive the analytical gradient of the loss function with respect to the forecast, which inspires an efficient training strategy. A numerical study shows our forecasts can bring significant benefits of the overall cost reduction to deterministic market clearing, compared to quality-oriented forecasting approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09004v1-abstract-full').style.display = 'none'; document.getElementById('2405.09004v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04285">arXiv:2405.04285</a> <span> [<a href="https://arxiv.org/pdf/2405.04285">pdf</a>, <a href="https://arxiv.org/format/2405.04285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> On the Foundations of Earth and Climate Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+X+X">Xiao Xiang Zhu</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhitong Xiong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/eess?searchtype=author&query=Stewart%2C+A+J">Adam J. Stewart</a>, <a href="/search/eess?searchtype=author&query=Heidler%2C+K">Konrad Heidler</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuanyuan Wang</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Z">Zhenghang Yuan</a>, <a href="/search/eess?searchtype=author&query=Dujardin%2C+T">Thomas Dujardin</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Q">Qingsong Xu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yilei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04285v1-abstract-short" style="display: inline;"> Foundation models have enormous potential in advancing Earth and climate sciences, however, current approaches may not be optimal as they focus on a few basic features of a desirable Earth and climate foundation model. Crafting the ideal Earth foundation model, we define eleven features which would allow such a foundation model to be beneficial for any geoscientific downstream application in an en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04285v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04285v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04285v1-abstract-full" style="display: none;"> Foundation models have enormous potential in advancing Earth and climate sciences, however, current approaches may not be optimal as they focus on a few basic features of a desirable Earth and climate foundation model. Crafting the ideal Earth foundation model, we define eleven features which would allow such a foundation model to be beneficial for any geoscientific downstream application in an environmental- and human-centric manner.We further shed light on the way forward to achieve the ideal model and to evaluate Earth foundation models. What comes after foundation models? Energy efficient adaptation, adversarial defenses, and interpretability are among the emerging directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04285v1-abstract-full').style.display = 'none'; document.getElementById('2405.04285v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15585">arXiv:2404.15585</a> <span> [<a href="https://arxiv.org/pdf/2404.15585">pdf</a>, <a href="https://arxiv.org/format/2404.15585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Brain Storm Optimization Based Swarm Learning for Diabetic Retinopathy Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qu%2C+L">Liang Qu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Cunze Wang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuhui Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15585v1-abstract-short" style="display: inline;"> The application of deep learning techniques to medical problems has garnered widespread research interest in recent years, such as applying convolutional neural networks to medical image classification tasks. However, data in the medical field is often highly private, preventing different hospitals from sharing data to train an accurate model. Federated learning, as a privacy-preserving machine le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15585v1-abstract-full').style.display = 'inline'; document.getElementById('2404.15585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15585v1-abstract-full" style="display: none;"> The application of deep learning techniques to medical problems has garnered widespread research interest in recent years, such as applying convolutional neural networks to medical image classification tasks. However, data in the medical field is often highly private, preventing different hospitals from sharing data to train an accurate model. Federated learning, as a privacy-preserving machine learning architecture, has shown promising performance in balancing data privacy and model utility by keeping private data on the client's side and using a central server to coordinate a set of clients for model training through aggregating their uploaded model parameters. Yet, this architecture heavily relies on a trusted third-party server, which is challenging to achieve in real life. Swarm learning, as a specialized decentralized federated learning architecture that does not require a central server, utilizes blockchain technology to enable direct parameter exchanges between clients. However, the mining of blocks requires significant computational resources, limiting its scalability. To address this issue, this paper integrates the brain storm optimization algorithm into the swarm learning framework, named BSO-SL. This approach clusters similar clients into different groups based on their model distributions. Additionally, leveraging the architecture of BSO, clients are given the probability to engage in collaborative learning both within their cluster and with clients outside their cluster, preventing the model from converging to local optima. The proposed method has been validated on a real-world diabetic retinopathy image classification dataset, and the experimental results demonstrate the effectiveness of the proposed approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15585v1-abstract-full').style.display = 'none'; document.getElementById('2404.15585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06007">arXiv:2404.06007</a> <span> [<a href="https://arxiv.org/pdf/2404.06007">pdf</a>, <a href="https://arxiv.org/format/2404.06007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Collaborative Edge AI Inference over Cloud-RAN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Pengfei Zhang</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+D">Dingzhu Wen</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+G">Guangxu Zhu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qimei Chen</a>, <a href="/search/eess?searchtype=author&query=Han%2C+K">Kaifeng Han</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06007v1-abstract-short" style="display: inline;"> In this paper, a cloud radio access network (Cloud-RAN) based collaborative edge AI inference architecture is proposed. Specifically, geographically distributed devices capture real-time noise-corrupted sensory data samples and extract the noisy local feature vectors, which are then aggregated at each remote radio head (RRH) to suppress sensing noise. To realize efficient uplink feature aggregatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06007v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06007v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06007v1-abstract-full" style="display: none;"> In this paper, a cloud radio access network (Cloud-RAN) based collaborative edge AI inference architecture is proposed. Specifically, geographically distributed devices capture real-time noise-corrupted sensory data samples and extract the noisy local feature vectors, which are then aggregated at each remote radio head (RRH) to suppress sensing noise. To realize efficient uplink feature aggregation, we allow each RRH receives local feature vectors from all devices over the same resource blocks simultaneously by leveraging an over-the-air computation (AirComp) technique. Thereafter, these aggregated feature vectors are quantized and transmitted to a central processor (CP) for further aggregation and downstream inference tasks. Our aim in this work is to maximize the inference accuracy via a surrogate accuracy metric called discriminant gain, which measures the discernibility of different classes in the feature space. The key challenges lie on simultaneously suppressing the coupled sensing noise, AirComp distortion caused by hostile wireless channels, and the quantization error resulting from the limited capacity of fronthaul links. To address these challenges, this work proposes a joint transmit precoding, receive beamforming, and quantization error control scheme to enhance the inference accuracy. Extensive numerical experiments demonstrate the effectiveness and superiority of our proposed optimization algorithm compared to various baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06007v1-abstract-full').style.display = 'none'; document.getElementById('2404.06007v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by IEEE Transactions on Communications on 08-Apr-2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01875">arXiv:2404.01875</a> <span> [<a href="https://arxiv.org/pdf/2404.01875">pdf</a>, <a href="https://arxiv.org/format/2404.01875">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Satellite Federated Edge Learning: Architecture Design and Convergence Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+L">Li Zeng</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jingyang Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yong Zhou</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+C">Chunxiao Jiang</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01875v1-abstract-short" style="display: inline;"> The proliferation of low-earth-orbit (LEO) satellite networks leads to the generation of vast volumes of remote sensing data which is traditionally transferred to the ground server for centralized processing, raising privacy and bandwidth concerns. Federated edge learning (FEEL), as a distributed machine learning approach, has the potential to address these challenges by sharing only model paramet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01875v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01875v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01875v1-abstract-full" style="display: none;"> The proliferation of low-earth-orbit (LEO) satellite networks leads to the generation of vast volumes of remote sensing data which is traditionally transferred to the ground server for centralized processing, raising privacy and bandwidth concerns. Federated edge learning (FEEL), as a distributed machine learning approach, has the potential to address these challenges by sharing only model parameters instead of raw data. Although promising, the dynamics of LEO networks, characterized by the high mobility of satellites and short ground-to-satellite link (GSL) duration, pose unique challenges for FEEL. Notably, frequent model transmission between the satellites and ground incurs prolonged waiting time and large transmission latency. This paper introduces a novel FEEL algorithm, named FEDMEGA, tailored to LEO mega-constellation networks. By integrating inter-satellite links (ISL) for intra-orbit model aggregation, the proposed algorithm significantly reduces the usage of low data rate and intermittent GSL. Our proposed method includes a ring all-reduce based intra-orbit aggregation mechanism, coupled with a network flow-based transmission scheme for global model aggregation, which enhances transmission efficiency. Theoretical convergence analysis is provided to characterize the algorithm performance. Extensive simulations show that our FEDMEGA algorithm outperforms existing satellite FEEL algorithms, exhibiting an approximate 30% improvement in convergence rate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01875v1-abstract-full').style.display = 'none'; document.getElementById('2404.01875v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16402">arXiv:2403.16402</a> <span> [<a href="https://arxiv.org/pdf/2403.16402">pdf</a>, <a href="https://arxiv.org/format/2403.16402">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Distributionally Robust Model Predictive Control for Static and Dynamic Uncertainties in Smart Grids </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qi Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Ye Shi</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/eess?searchtype=author&query=Poor%2C+H+V">H. Vincent Poor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16402v1-abstract-short" style="display: inline;"> The integration of various power sources, including renewables and electric vehicles, into smart grids is expanding, introducing uncertainties that can result in issues like voltage imbalances, load fluctuations, and power losses. These challenges negatively impact the reliability and stability of online scheduling in smart grids. Existing research often addresses uncertainties affecting current s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16402v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16402v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16402v1-abstract-full" style="display: none;"> The integration of various power sources, including renewables and electric vehicles, into smart grids is expanding, introducing uncertainties that can result in issues like voltage imbalances, load fluctuations, and power losses. These challenges negatively impact the reliability and stability of online scheduling in smart grids. Existing research often addresses uncertainties affecting current states but overlooks those that impact future states, such as the unpredictable charging patterns of electric vehicles. To distinguish between these, we term them static uncertainties and dynamic uncertainties, respectively. This paper introduces WDR-MPC, a novel approach that stands for two-stage Wasserstein-based Distributionally Robust (WDR) optimization within a Model Predictive Control (MPC) framework, aimed at effectively managing both types of uncertainties in smart grids. The dynamic uncertainties are first reformulated into ambiguity tubes and then the distributionally robust bounds of both dynamic and static uncertainties can be established using WDR optimization. By employing ambiguity tubes and WDR optimization, the stochastic MPC system is converted into a nominal one. Moreover, we develop a convex reformulation method to speed up WDR computation during the two-stage optimization. The distinctive contribution of this paper lies in its holistic approach to both static and dynamic uncertainties in smart grids. Comprehensive experiment results on IEEE 38-bus and 94-bus systems reveal the method's superior performance and the potential to enhance grid stability and reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16402v1-abstract-full').style.display = 'none'; document.getElementById('2403.16402v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11870">arXiv:2403.11870</a> <span> [<a href="https://arxiv.org/pdf/2403.11870">pdf</a>, <a href="https://arxiv.org/format/2403.11870">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TGRS.2024.3378720">10.1109/TGRS.2024.3378720 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> IDF-CR: Iterative Diffusion Process for Divide-and-Conquer Cloud Removal in Remote-sensing Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+M">Meilin Wang</a>, <a href="/search/eess?searchtype=author&query=Song%2C+Y">Yexing Song</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+P">Pengxu Wei</a>, <a href="/search/eess?searchtype=author&query=Xian%2C+X">Xiaoyu Xian</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yukai Shi</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+L">Liang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11870v1-abstract-short" style="display: inline;"> Deep learning technologies have demonstrated their effectiveness in removing cloud cover from optical remote-sensing images. Convolutional Neural Networks (CNNs) exert dominance in the cloud removal tasks. However, constrained by the inherent limitations of convolutional operations, CNNs can address only a modest fraction of cloud occlusion. In recent years, diffusion models have achieved state-of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11870v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11870v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11870v1-abstract-full" style="display: none;"> Deep learning technologies have demonstrated their effectiveness in removing cloud cover from optical remote-sensing images. Convolutional Neural Networks (CNNs) exert dominance in the cloud removal tasks. However, constrained by the inherent limitations of convolutional operations, CNNs can address only a modest fraction of cloud occlusion. In recent years, diffusion models have achieved state-of-the-art (SOTA) proficiency in image generation and reconstruction due to their formidable generative capabilities. Inspired by the rapid development of diffusion models, we first present an iterative diffusion process for cloud removal (IDF-CR), which exhibits a strong generative capabilities to achieve component divide-and-conquer cloud removal. IDF-CR consists of a pixel space cloud removal module (Pixel-CR) and a latent space iterative noise diffusion network (IND). Specifically, IDF-CR is divided into two-stage models that address pixel space and latent space. The two-stage model facilitates a strategic transition from preliminary cloud reduction to meticulous detail refinement. In the pixel space stage, Pixel-CR initiates the processing of cloudy images, yielding a suboptimal cloud removal prior to providing the diffusion model with prior cloud removal knowledge. In the latent space stage, the diffusion model transforms low-quality cloud removal into high-quality clean output. We refine the Stable Diffusion by implementing ControlNet. In addition, an unsupervised iterative noise refinement (INR) module is introduced for diffusion model to optimize the distribution of the predicted noise, thereby enhancing advanced detail recovery. Our model performs best with other SOTA methods, including image reconstruction and optical remote-sensing cloud removal on the optical remote-sensing datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11870v1-abstract-full').style.display = 'none'; document.getElementById('2403.11870v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE TGRS, we first present an iterative diffusion process for cloud removal, the code is available at: https://github.com/SongYxing/IDF-CR</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Shi%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository