CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,219 results for author: <span class="mathjax">Chen, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Chen%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Chen, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Chen%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Chen, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14700">arXiv:2411.14700</a> <span> [<a href="https://arxiv.org/pdf/2411.14700">pdf</a>, <a href="https://arxiv.org/format/2411.14700">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSG.2023.3317590">10.1109/TSG.2023.3317590 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Optimal Energy Dispatch of Grid-Connected Electric Vehicle Considering Lithium Battery Electrochemical Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuanbo Chen</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+K">Kedi Zheng</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Y">Yuxuan Gu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jianxiao Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qixin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14700v1-abstract-short" style="display: inline;"> The grid-connected electric vehicles (EVs) serve as a promising regulating resource in the distribution grid with Vehicle-to-Grid (V2G) facilities. In the day-ahead stage, electric vehicle batteries (EVBs) need to be precisely dispatched and controlled to ensure high efficiency and prevent degradation. This article focuses on considering a refined battery model, i.e. the electrochemical model (EM)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14700v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14700v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14700v1-abstract-full" style="display: none;"> The grid-connected electric vehicles (EVs) serve as a promising regulating resource in the distribution grid with Vehicle-to-Grid (V2G) facilities. In the day-ahead stage, electric vehicle batteries (EVBs) need to be precisely dispatched and controlled to ensure high efficiency and prevent degradation. This article focuses on considering a refined battery model, i.e. the electrochemical model (EM), in the optimal dispatch of the local energy system with high penetration of EVs which replenish energy through V2G-equipped charge station and battery swapping station (BSS). In this paper, to utilize the EM efficiently, recursive EVB constraints and a corresponding matrix-based state update method are proposed based on EM power characterization. The charging EV state distribution is profiled and a multi-layer BSS model along with binary aggregation is proposed, in order to overcome the computation complexity of combining the refined battery constraints with the mixed integer optimization. Finally, a local energy system scenario is investigated for evaluation. The efficiency and effectiveness of EM consideration are assessed from the perspective of both the system and battery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14700v1-abstract-full').style.display = 'none'; document.getElementById('2411.14700v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted for IEEE Transactions on Smart Grid. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Smart Grid, vol. 15, no. 3, pp. 3000-3015, May 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14525">arXiv:2411.14525</a> <span> [<a href="https://arxiv.org/pdf/2411.14525">pdf</a>, <a href="https://arxiv.org/format/2411.14525">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SegBook: A Simple Baseline and Cookbook for Volumetric Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ye%2C+J">Jin Ye</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Ying Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yanjun Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+Z">Zhongying Deng</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Ziyan Huang</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Y">Yanzhou Su</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+C">Chenglong Ma</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+Y">Yuanfeng Ji</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Junjun He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14525v1-abstract-short" style="display: inline;"> Computed Tomography (CT) is one of the most popular modalities for medical imaging. By far, CT images have contributed to the largest publicly available datasets for volumetric medical segmentation tasks, covering full-body anatomical structures. Large amounts of full-body CT images provide the opportunity to pre-train powerful models, e.g., STU-Net pre-trained in a supervised fashion, to segment… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14525v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14525v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14525v1-abstract-full" style="display: none;"> Computed Tomography (CT) is one of the most popular modalities for medical imaging. By far, CT images have contributed to the largest publicly available datasets for volumetric medical segmentation tasks, covering full-body anatomical structures. Large amounts of full-body CT images provide the opportunity to pre-train powerful models, e.g., STU-Net pre-trained in a supervised fashion, to segment numerous anatomical structures. However, it remains unclear in which conditions these pre-trained models can be transferred to various downstream medical segmentation tasks, particularly segmenting the other modalities and diverse targets. To address this problem, a large-scale benchmark for comprehensive evaluation is crucial for finding these conditions. Thus, we collected 87 public datasets varying in modality, target, and sample size to evaluate the transfer ability of full-body CT pre-trained models. We then employed a representative model, STU-Net with multiple model scales, to conduct transfer learning across modalities and targets. Our experimental results show that (1) there may be a bottleneck effect concerning the dataset size in fine-tuning, with more improvement on both small- and large-scale datasets than medium-size ones. (2) Models pre-trained on full-body CT demonstrate effective modality transfer, adapting well to other modalities such as MRI. (3) Pre-training on the full-body CT not only supports strong performance in structure detection but also shows efficacy in lesion detection, showcasing adaptability across target tasks. We hope that this large-scale open evaluation of transfer learning can direct future research in volumetric medical image segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14525v1-abstract-full').style.display = 'none'; document.getElementById('2411.14525v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13602">arXiv:2411.13602</a> <span> [<a href="https://arxiv.org/pdf/2411.13602">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-scale cross-modality pretrained model enhances cardiovascular state estimation and cardiomyopathy detection from electrocardiograms: An AI system development and multi-center validation study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ding%2C+Z">Zhengyao Ding</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yujian Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Youyao Xu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Chengchen Zhao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Ziyu Li</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+Y">Yiheng Mao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haitao Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qian Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yue Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Mengjia Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Longbo Wang</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+X">Xuesen Chu</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+W">Weichao Pan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Ziyi Liu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+F">Fei Wu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hongkun Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+T">Ting Chen</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Zhengxing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13602v1-abstract-short" style="display: inline;"> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13602v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13602v1-abstract-full" style="display: none;"> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an innovative model that enhances ECG analysis by leveraging the diagnostic strengths of CMR through cross-modal contrastive learning and generative pretraining. CardiacNets serves two primary functions: (1) it evaluates detailed cardiac function indicators and screens for potential CVDs, including coronary artery disease, cardiomyopathy, pericarditis, heart failure and pulmonary hypertension, using ECG input; and (2) it enhances interpretability by generating high-quality CMR images from ECG data. We train and validate the proposed CardiacNets on two large-scale public datasets (the UK Biobank with 41,519 individuals and the MIMIC-IV-ECG comprising 501,172 samples) as well as three private datasets (FAHZU with 410 individuals, SAHZU with 464 individuals, and QPH with 338 individuals), and the findings demonstrate that CardiacNets consistently outperforms traditional ECG-only models, substantially improving screening accuracy. Furthermore, the generated CMR images provide valuable diagnostic support for physicians of all experience levels. This proof-of-concept study highlights how ECG can facilitate cross-modal insights into cardiac function assessment, paving the way for enhanced CVD screening and diagnosis at a population level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13602v1-abstract-full').style.display = 'none'; document.getElementById('2411.13602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13577">arXiv:2411.13577</a> <span> [<a href="https://arxiv.org/pdf/2411.13577">pdf</a>, <a href="https://arxiv.org/format/2411.13577">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> WavChat: A Survey of Spoken Dialogue Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ji%2C+S">Shengpeng Ji</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yifu Chen</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+M">Minghui Fang</a>, <a href="/search/eess?searchtype=author&query=Zuo%2C+J">Jialong Zuo</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+J">Jingyu Lu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hanting Wang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Z">Ziyue Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+L">Long Zhou</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shujie Liu</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+X">Xize Cheng</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xiaoda Yang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zehan Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qian Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jian Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yidi Jiang</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Jingzhen He</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+Y">Yunfei Chu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+J">Jin Xu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13577v1-abstract-short" style="display: inline;"> Recent advancements in spoken dialogue models, exemplified by systems like GPT-4o, have captured significant attention in the speech domain. Compared to traditional three-tier cascaded spoken dialogue models that comprise speech recognition (ASR), large language models (LLMs), and text-to-speech (TTS), modern spoken dialogue models exhibit greater intelligence. These advanced spoken dialogue model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13577v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13577v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13577v1-abstract-full" style="display: none;"> Recent advancements in spoken dialogue models, exemplified by systems like GPT-4o, have captured significant attention in the speech domain. Compared to traditional three-tier cascaded spoken dialogue models that comprise speech recognition (ASR), large language models (LLMs), and text-to-speech (TTS), modern spoken dialogue models exhibit greater intelligence. These advanced spoken dialogue models not only comprehend audio, music, and other speech-related features, but also capture stylistic and timbral characteristics in speech. Moreover, they generate high-quality, multi-turn speech responses with low latency, enabling real-time interaction through simultaneous listening and speaking capability. Despite the progress in spoken dialogue systems, there is a lack of comprehensive surveys that systematically organize and analyze these systems and the underlying technologies. To address this, we have first compiled existing spoken dialogue systems in the chronological order and categorized them into the cascaded and end-to-end paradigms. We then provide an in-depth overview of the core technologies in spoken dialogue models, covering aspects such as speech representation, training paradigm, streaming, duplex, and interaction capabilities. Each section discusses the limitations of these technologies and outlines considerations for future research. Additionally, we present a thorough review of relevant datasets, evaluation metrics, and benchmarks from the perspectives of training and evaluating spoken dialogue systems. We hope this survey will contribute to advancing both academic research and industrial applications in the field of spoken dialogue systems. The related material is available at https://github.com/jishengpeng/WavChat. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13577v1-abstract-full').style.display = 'none'; document.getElementById('2411.13577v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">60 papes, working in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12146">arXiv:2411.12146</a> <span> [<a href="https://arxiv.org/pdf/2411.12146">pdf</a>, <a href="https://arxiv.org/format/2411.12146">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Self-supervised denoising of visual field data improves detection of glaucoma progression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+S">Sean Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J+Y">Jun Yu Chen</a>, <a href="/search/eess?searchtype=author&query=Mohammadzadeh%2C+V">Vahid Mohammadzadeh</a>, <a href="/search/eess?searchtype=author&query=Besharati%2C+S">Sajad Besharati</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+J">Jaewon Lee</a>, <a href="/search/eess?searchtype=author&query=Nouri-Mahdavi%2C+K">Kouros Nouri-Mahdavi</a>, <a href="/search/eess?searchtype=author&query=Caprioli%2C+J">Joseph Caprioli</a>, <a href="/search/eess?searchtype=author&query=Fei%2C+Z">Zhe Fei</a>, <a href="/search/eess?searchtype=author&query=Scalzo%2C+F">Fabien Scalzo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12146v1-abstract-short" style="display: inline;"> Perimetric measurements provide insight into a patient's peripheral vision and day-to-day functioning and are the main outcome measure for identifying progression of visual damage from glaucoma. However, visual field data can be noisy, exhibiting high variance, especially with increasing damage. In this study, we demonstrate the utility of self-supervised deep learning in denoising visual field da… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12146v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12146v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12146v1-abstract-full" style="display: none;"> Perimetric measurements provide insight into a patient's peripheral vision and day-to-day functioning and are the main outcome measure for identifying progression of visual damage from glaucoma. However, visual field data can be noisy, exhibiting high variance, especially with increasing damage. In this study, we demonstrate the utility of self-supervised deep learning in denoising visual field data from over 4000 patients to enhance its signal-to-noise ratio and its ability to detect true glaucoma progression. We deployed both a variational autoencoder (VAE) and a masked autoencoder to determine which self-supervised model best smooths the visual field data while reconstructing salient features that are less noisy and more predictive of worsening disease. Our results indicate that including a categorical p-value at every visual field location improves the smoothing of visual field data. Masked autoencoders led to cleaner denoised data than previous methods, such as variational autoencoders. A 4.7% increase in detection of progressing eyes with pointwise linear regression (PLR) was observed. The masked and variational autoencoders' smoothed data predicted glaucoma progression 2.3 months earlier when p-values were included compared to when they were not. The faster prediction of time to progression (TTP) and the higher percentage progression detected support our hypothesis that masking out visual field elements during training while including p-values at each location would improve the task of detection of visual field progression. Our study has clinically relevant implications regarding masking when training neural networks to denoise visual field data, resulting in earlier and more accurate detection of glaucoma progression. This denoising model can be integrated into future models for visual field analysis to enhance detection of glaucoma progression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12146v1-abstract-full').style.display = 'none'; document.getElementById('2411.12146v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12104">arXiv:2411.12104</a> <span> [<a href="https://arxiv.org/pdf/2411.12104">pdf</a>, <a href="https://arxiv.org/format/2411.12104">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Is Locational Marginal Price All You Need for Locational Marginal Emission? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=He%2C+X">Xuan He</a>, <a href="/search/eess?searchtype=author&query=Tsang%2C+D+H+K">Danny H. K. Tsang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yize Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12104v1-abstract-short" style="display: inline;"> Growing concerns over climate change call for improved techniques for estimating and quantifying the greenhouse gas emissions associated with electricity generation and transmission. Among the emission metrics designated for power grids, locational marginal emission (LME) can provide system operators and electricity market participants with valuable information on the emissions associated with ele… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12104v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12104v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12104v1-abstract-full" style="display: none;"> Growing concerns over climate change call for improved techniques for estimating and quantifying the greenhouse gas emissions associated with electricity generation and transmission. Among the emission metrics designated for power grids, locational marginal emission (LME) can provide system operators and electricity market participants with valuable information on the emissions associated with electricity usage at various locations in the power network. In this paper, by investigating the operating patterns and physical interpretations of marginal emissions and costs in the security-constrained economic dispatch (SCED) problem, we identify and draw the exact connection between locational marginal price (LMP) and LME. Such interpretation helps instantly derive LME given nodal demand vectors or LMP, and also reveals the interplay between network congestion and nodal emission pattern. Our proposed approach helps reduce the computation time of LME by an order of magnitude compared to analytical approaches, while it can also serve as a plug-and-play module accompanied by an off-the-shelf market clearing and LMP calculation process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12104v1-abstract-full').style.display = 'none'; document.getElementById('2411.12104v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures, in submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11687">arXiv:2411.11687</a> <span> [<a href="https://arxiv.org/pdf/2411.11687">pdf</a>, <a href="https://arxiv.org/format/2411.11687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Coevolution of Opinion Dynamics and Recommendation System: Modeling Analysis and Reinforcement Learning Based Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuhong Chen</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+X">Xiaobing Dai</a>, <a href="/search/eess?searchtype=author&query=Buss%2C+M">Martin Buss</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Fangzhou Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11687v1-abstract-short" style="display: inline;"> In this work, we develop an analytical framework that integrates opinion dynamics with a recommendation system. By incorporating elements such as collaborative filtering, we provide a precise characterization of how recommendation systems shape interpersonal interactions and influence opinion formation. Moreover, the property of the coevolution of both opinion dynamics and recommendation systems i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11687v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11687v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11687v1-abstract-full" style="display: none;"> In this work, we develop an analytical framework that integrates opinion dynamics with a recommendation system. By incorporating elements such as collaborative filtering, we provide a precise characterization of how recommendation systems shape interpersonal interactions and influence opinion formation. Moreover, the property of the coevolution of both opinion dynamics and recommendation systems is also shown. Specifically, the convergence of this coevolutionary system is theoretically proved, and the mechanisms behind filter bubble formation are elucidated. Our analysis of the maximum number of opinion clusters shows how recommendation system parameters affect opinion grouping and polarization. Additionally, we incorporate the influence of propagators into our model and propose a reinforcement learning-based solution. The analysis and the propagation solution are demonstrated in simulation using the Yelp data set. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11687v1-abstract-full').style.display = 'none'; document.getElementById('2411.11687v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10570">arXiv:2411.10570</a> <span> [<a href="https://arxiv.org/pdf/2411.10570">pdf</a>, <a href="https://arxiv.org/format/2411.10570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Normative Modeling for AD Diagnosis and Biomarker Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+S">Songlin Zhao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+R">Rong Zhou</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yong Chen</a>, <a href="/search/eess?searchtype=author&query=He%2C+L">Lifang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10570v1-abstract-short" style="display: inline;"> In this paper, we introduce a novel normative modeling approach that incorporates focal loss and adversarial autoencoders (FAAE) for Alzheimer's Disease (AD) diagnosis and biomarker identification. Our method is an end-to-end approach that embeds an adversarial focal loss discriminator within the autoencoder structure, specifically designed to effectively target and capture more complex and challe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10570v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10570v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10570v1-abstract-full" style="display: none;"> In this paper, we introduce a novel normative modeling approach that incorporates focal loss and adversarial autoencoders (FAAE) for Alzheimer's Disease (AD) diagnosis and biomarker identification. Our method is an end-to-end approach that embeds an adversarial focal loss discriminator within the autoencoder structure, specifically designed to effectively target and capture more complex and challenging cases. We first use the enhanced autoencoder to create a normative model based on data from healthy control (HC) individuals. We then apply this model to estimate total and regional neuroanatomical deviation in AD patients. Through extensive experiments on the OASIS-3 and ADNI datasets, our approach significantly outperforms previous state-of-the-art methods. This advancement not only streamlines the detection process but also provides a greater insight into the biomarker potential for AD. Our code can be found at \url{https://github.com/soz223/FAAE}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10570v1-abstract-full').style.display = 'none'; document.getElementById('2411.10570v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09220">arXiv:2411.09220</a> <span> [<a href="https://arxiv.org/pdf/2411.09220">pdf</a>, <a href="https://arxiv.org/format/2411.09220">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Transferable Adversarial Attacks against ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiaoxue Gao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zexin Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yiming Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Cong Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09220v1-abstract-short" style="display: inline;"> Given the extensive research and real-world applications of automatic speech recognition (ASR), ensuring the robustness of ASR models against minor input perturbations becomes a crucial consideration for maintaining their effectiveness in real-time scenarios. Previous explorations into ASR model robustness have predominantly revolved around evaluating accuracy on white-box settings with full acces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09220v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09220v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09220v1-abstract-full" style="display: none;"> Given the extensive research and real-world applications of automatic speech recognition (ASR), ensuring the robustness of ASR models against minor input perturbations becomes a crucial consideration for maintaining their effectiveness in real-time scenarios. Previous explorations into ASR model robustness have predominantly revolved around evaluating accuracy on white-box settings with full access to ASR models. Nevertheless, full ASR model details are often not available in real-world applications. Therefore, evaluating the robustness of black-box ASR models is essential for a comprehensive understanding of ASR model resilience. In this regard, we thoroughly study the vulnerability of practical black-box attacks in cutting-edge ASR models and propose to employ two advanced time-domain-based transferable attacks alongside our differentiable feature extractor. We also propose a speech-aware gradient optimization approach (SAGO) for ASR, which forces mistranscription with minimal impact on human imperceptibility through voice activity detection rule and a speech-aware gradient-oriented optimizer. Our comprehensive experimental results reveal performance enhancements compared to baseline approaches across five models on two databases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09220v1-abstract-full').style.display = 'none'; document.getElementById('2411.09220v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE SPL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06107">arXiv:2411.06107</a> <span> [<a href="https://arxiv.org/pdf/2411.06107">pdf</a>, <a href="https://arxiv.org/format/2411.06107">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A capacity renting framework for shared energy storage considering peer-to-peer energy trading of prosumers with privacy protection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yingcong Sun</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Laijun Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yue Chen</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+M">Mingrui Tang</a>, <a href="/search/eess?searchtype=author&query=Mei%2C+S">Shengwei Mei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06107v1-abstract-short" style="display: inline;"> Shared energy storage systems (ESS) present a promising solution to the temporal imbalance between energy generation from renewable distributed generators (DGs) and the power demands of prosumers. However, as DG penetration rates rise, spatial energy imbalances become increasingly significant, necessitating the integration of peer-to-peer (P2P) energy trading within the shared ESS framework. Two k… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06107v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06107v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06107v1-abstract-full" style="display: none;"> Shared energy storage systems (ESS) present a promising solution to the temporal imbalance between energy generation from renewable distributed generators (DGs) and the power demands of prosumers. However, as DG penetration rates rise, spatial energy imbalances become increasingly significant, necessitating the integration of peer-to-peer (P2P) energy trading within the shared ESS framework. Two key challenges emerge in this context: the absence of effective mechanisms and the greater difficulty for privacy protection due to increased data communication. This research proposes a capacity renting framework for shared ESS considering P2P energy trading of prosumers. In the proposed framework, prosumers can participate in P2P energy trading and rent capacities from shared ESS. A generalized Nash game is formulated to model the trading process and the competitive interactions among prosumers, and the variational equilibrium of the game is proved to be equivalent to the optimal solution of a quadratic programming (QP) problem. To address the privacy protection concern, the problem is solved using the alternating direction method of multipliers (ADMM) with the Paillier cryptosystem. Finally, numerical simulations demonstrate the impact of P2P energy trading on the shared ESS framework and validate the effectiveness of the proposed privacy-preserving algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06107v1-abstract-full').style.display = 'none'; document.getElementById('2411.06107v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05361">arXiv:2411.05361</a> <span> [<a href="https://arxiv.org/pdf/2411.05361">pdf</a>, <a href="https://arxiv.org/format/2411.05361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Dynamic-SUPERB Phase-2: A Collaboratively Expanding Benchmark for Measuring the Capabilities of Spoken Language Models with 180 Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chien-yu Huang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei-Chih Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S">Shu-wen Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+A+T">Andy T. Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chen-An Li</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yu-Xiang Lin</a>, <a href="/search/eess?searchtype=author&query=Tseng%2C+W">Wei-Cheng Tseng</a>, <a href="/search/eess?searchtype=author&query=Diwan%2C+A">Anuj Diwan</a>, <a href="/search/eess?searchtype=author&query=Shih%2C+Y">Yi-Jen Shih</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jiatong Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">William Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xuanjun Chen</a>, <a href="/search/eess?searchtype=author&query=Hsiao%2C+C">Chi-Yuan Hsiao</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+P">Puyuan Peng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shih-Heng Wang</a>, <a href="/search/eess?searchtype=author&query=Kuan%2C+C">Chun-Yi Kuan</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+K">Ke-Han Lu</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chih-Kai Yang</a>, <a href="/search/eess?searchtype=author&query=Ritter-Gutierrez%2C+F">Fabian Ritter-Gutierrez</a>, <a href="/search/eess?searchtype=author&query=Chuang%2C+M+T">Ming To Chuang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+K">Kuan-Po Huang</a>, <a href="/search/eess?searchtype=author&query=Arora%2C+S">Siddhant Arora</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">You-Kuan Lin</a>, <a href="/search/eess?searchtype=author&query=Yeo%2C+E">Eunjung Yeo</a> , et al. (53 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05361v1-abstract-short" style="display: inline;"> Multimodal foundation models, such as Gemini and ChatGPT, have revolutionized human-machine interactions by seamlessly integrating various forms of data. Developing a universal spoken language model that comprehends a wide range of natural language instructions is critical for bridging communication gaps and facilitating more intuitive interactions. However, the absence of a comprehensive evaluati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05361v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05361v1-abstract-full" style="display: none;"> Multimodal foundation models, such as Gemini and ChatGPT, have revolutionized human-machine interactions by seamlessly integrating various forms of data. Developing a universal spoken language model that comprehends a wide range of natural language instructions is critical for bridging communication gaps and facilitating more intuitive interactions. However, the absence of a comprehensive evaluation benchmark poses a significant challenge. We present Dynamic-SUPERB Phase-2, an open and evolving benchmark for the comprehensive evaluation of instruction-based universal speech models. Building upon the first generation, this second version incorporates 125 new tasks contributed collaboratively by the global research community, expanding the benchmark to a total of 180 tasks, making it the largest benchmark for speech and audio evaluation. While the first generation of Dynamic-SUPERB was limited to classification tasks, Dynamic-SUPERB Phase-2 broadens its evaluation capabilities by introducing a wide array of novel and diverse tasks, including regression and sequence generation, across speech, music, and environmental audio. Evaluation results indicate that none of the models performed well universally. SALMONN-13B excelled in English ASR, while WavLLM demonstrated high accuracy in emotion recognition, but current models still require further innovations to handle a broader range of tasks. We will soon open-source all task data and the evaluation pipeline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05361v1-abstract-full').style.display = 'none'; document.getElementById('2411.05361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02447">arXiv:2411.02447</a> <span> [<a href="https://arxiv.org/pdf/2411.02447">pdf</a>, <a href="https://arxiv.org/format/2411.02447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> qGDP: Quantum Legalization and Detailed Placement for Superconducting Quantum Computers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Junyao Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+G">Guanglei Zhou</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+F">Feng Cheng</a>, <a href="/search/eess?searchtype=author&query=Ku%2C+J">Jonathan Ku</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Q">Qi Ding</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+J">Jiaqi Gu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hanrui Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H+%22">Hai "Helen" Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yiran Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02447v1-abstract-short" style="display: inline;"> Noisy Intermediate-Scale Quantum (NISQ) computers are currently limited by their qubit numbers, which hampers progress towards fault-tolerant quantum computing. A major challenge in scaling these systems is crosstalk, which arises from unwanted interactions among neighboring components such as qubits and resonators. An innovative placement strategy tailored for superconducting quantum computers ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02447v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02447v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02447v1-abstract-full" style="display: none;"> Noisy Intermediate-Scale Quantum (NISQ) computers are currently limited by their qubit numbers, which hampers progress towards fault-tolerant quantum computing. A major challenge in scaling these systems is crosstalk, which arises from unwanted interactions among neighboring components such as qubits and resonators. An innovative placement strategy tailored for superconducting quantum computers can systematically address crosstalk within the constraints of limited substrate areas. Legalization is a crucial stage in placement process, refining post-global-placement configurations to satisfy design constraints and enhance layout quality. However, existing legalizers are not supported to legalize quantum placements. We aim to address this gap with qGDP, developed to meticulously legalize quantum components by adhering to quantum spatial constraints and reducing resonator crossing to alleviate various crosstalk effects. Our results indicate that qGDP effectively legalizes and fine-tunes the layout, addressing the quantum-specific spatial constraints inherent in various device topologies. By evaluating diverse NISQ benchmarks. qGDP consistently outperforms state-of-the-art legalization engines, delivering substantial improvements in fidelity and reducing spatial violation, with average gains of 34.4x and 16.9x, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02447v1-abstract-full').style.display = 'none'; document.getElementById('2411.02447v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01859">arXiv:2411.01859</a> <span> [<a href="https://arxiv.org/pdf/2411.01859">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Novel Deep Learning Tractography Fiber Clustering Framework for Functionally Consistent White Matter Parcellation Using Multimodal Diffusion MRI and Functional MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jin Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+B">Bocheng Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yijie Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Junyi Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuqian Chen</a>, <a href="/search/eess?searchtype=author&query=Rushmore%2C+J">Jarrett Rushmore</a>, <a href="/search/eess?searchtype=author&query=Makris%2C+N">Nikos Makris</a>, <a href="/search/eess?searchtype=author&query=Rathi%2C+Y">Yogesh Rathi</a>, <a href="/search/eess?searchtype=author&query=O%27Donnell%2C+L+J">Lauren J O'Donnell</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+F">Fan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01859v1-abstract-short" style="display: inline;"> Tractography fiber clustering using diffusion MRI (dMRI) is a crucial strategy for white matter (WM) parcellation. Current methods primarily use the geometric information of fibers (i.e., the spatial trajectories) to group similar fibers into clusters, overlooking the important functional signals present along the fiber tracts. There is increasing evidence that neural activity in the WM can be mea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01859v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01859v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01859v1-abstract-full" style="display: none;"> Tractography fiber clustering using diffusion MRI (dMRI) is a crucial strategy for white matter (WM) parcellation. Current methods primarily use the geometric information of fibers (i.e., the spatial trajectories) to group similar fibers into clusters, overlooking the important functional signals present along the fiber tracts. There is increasing evidence that neural activity in the WM can be measured using functional MRI (fMRI), offering potentially valuable multimodal information for fiber clustering. In this paper, we develop a novel deep learning fiber clustering framework, namely Deep Multi-view Fiber Clustering (DMVFC), that uses joint dMRI and fMRI data to enable functionally consistent WM parcellation. DMVFC can effectively integrate the geometric characteristics of the WM fibers with the fMRI BOLD signals along the fiber tracts. It includes two major components: 1) a multi-view pretraining module to compute embedding features from fiber geometric information and functional signals separately, and 2) a collaborative fine-tuning module to simultaneously refine the two kinds of embeddings. In the experiments, we compare DMVFC with two state-of-the-art fiber clustering methods and demonstrate superior performance in achieving functionally meaningful and consistent WM parcellation results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01859v1-abstract-full').style.display = 'none'; document.getElementById('2411.01859v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00335">arXiv:2411.00335</a> <span> [<a href="https://arxiv.org/pdf/2411.00335">pdf</a>, <a href="https://arxiv.org/format/2411.00335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> NCST: Neural-based Color Style Transfer for Video Retouching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+X">Xintao Jiang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yaosen Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Siqin Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+X">Xuming Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00335v1-abstract-short" style="display: inline;"> Video color style transfer aims to transform the color style of an original video by using a reference style image. Most existing methods employ neural networks, which come with challenges like opaque transfer processes and limited user control over the outcomes. Typically, users cannot fine-tune the resulting images or videos. To tackle this issue, we introduce a method that predicts specific par… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00335v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00335v1-abstract-full" style="display: none;"> Video color style transfer aims to transform the color style of an original video by using a reference style image. Most existing methods employ neural networks, which come with challenges like opaque transfer processes and limited user control over the outcomes. Typically, users cannot fine-tune the resulting images or videos. To tackle this issue, we introduce a method that predicts specific parameters for color style transfer using two images. Initially, we train a neural network to learn the corresponding color adjustment parameters. When applying style transfer to a video, we fine-tune the network with key frames from the video and the chosen style image, generating precise transformation parameters. These are then applied to convert the color style of both images and videos. Our experimental results demonstrate that our algorithm surpasses current methods in color style transfer quality. Moreover, each parameter in our method has a specific, interpretable meaning, enabling users to understand the color style transfer process and allowing them to perform manual fine-tuning if desired. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00335v1-abstract-full').style.display = 'none'; document.getElementById('2411.00335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22674">arXiv:2410.22674</a> <span> [<a href="https://arxiv.org/pdf/2410.22674">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamic PET Image Prediction Using a Network Combining Reversible and Irreversible Modules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jie Sun</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+Q">Qian Xia</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chuanfu Sun</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yumei Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Huafeng Liu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+W">Wentao Zhu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Qiegen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22674v1-abstract-short" style="display: inline;"> Dynamic positron emission tomography (PET) images can reveal the distribution of tracers in the organism and the dynamic processes involved in biochemical reactions, and it is widely used in clinical practice. Despite the high effectiveness of dynamic PET imaging in studying the kinetics and metabolic processes of radiotracers. Pro-longed scan times can cause discomfort for both patients and medic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22674v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22674v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22674v1-abstract-full" style="display: none;"> Dynamic positron emission tomography (PET) images can reveal the distribution of tracers in the organism and the dynamic processes involved in biochemical reactions, and it is widely used in clinical practice. Despite the high effectiveness of dynamic PET imaging in studying the kinetics and metabolic processes of radiotracers. Pro-longed scan times can cause discomfort for both patients and medical personnel. This study proposes a dynamic frame prediction method for dynamic PET imaging, reduc-ing dynamic PET scanning time by applying a multi-module deep learning framework composed of reversible and irreversible modules. The network can predict kinetic parameter images based on the early frames of dynamic PET images, and then generate complete dynamic PET images. In validation experiments with simulated data, our network demonstrated good predictive performance for kinetic parameters and was able to reconstruct high-quality dynamic PET images. Additionally, in clinical data experiments, the network exhibited good generalization performance and attached that the proposed method has promising clinical application prospects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22674v1-abstract-full').style.display = 'none'; document.getElementById('2410.22674v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22646">arXiv:2410.22646</a> <span> [<a href="https://arxiv.org/pdf/2410.22646">pdf</a>, <a href="https://arxiv.org/format/2410.22646">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SleepNetZero: Zero-Burden Zero-Shot Reliable Sleep Staging With Neural Networks Based on Ballistocardiograms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Shuzhen Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuxin Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xuesong Chen</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+R">Ruiyang Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yupeng Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+C">Chao Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunfei Li</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Z">Ziyi Ye</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+W">Weijun Huang</a>, <a href="/search/eess?searchtype=author&query=Yi%2C+H">Hongliang Yi</a>, <a href="/search/eess?searchtype=author&query=Leng%2C+Y">Yue Leng</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22646v1-abstract-short" style="display: inline;"> Sleep monitoring plays a crucial role in maintaining good health, with sleep staging serving as an essential metric in the monitoring process. Traditional methods, utilizing medical sensors like EEG and ECG, can be effective but often present challenges such as unnatural user experience, complex deployment, and high costs. Ballistocardiography~(BCG), a type of piezoelectric sensor signal, offers a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22646v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22646v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22646v1-abstract-full" style="display: none;"> Sleep monitoring plays a crucial role in maintaining good health, with sleep staging serving as an essential metric in the monitoring process. Traditional methods, utilizing medical sensors like EEG and ECG, can be effective but often present challenges such as unnatural user experience, complex deployment, and high costs. Ballistocardiography~(BCG), a type of piezoelectric sensor signal, offers a non-invasive, user-friendly, and easily deployable alternative for long-term home monitoring. However, reliable BCG-based sleep staging is challenging due to the limited sleep monitoring data available for BCG. A restricted training dataset prevents the model from generalization across populations. Additionally, transferring to BCG faces difficulty ensuring model robustness when migrating from other data sources. To address these issues, we introduce SleepNetZero, a zero-shot learning based approach for sleep staging. To tackle the generalization challenge, we propose a series of BCG feature extraction methods that align BCG components with corresponding respiratory, cardiac, and movement channels in PSG. This allows models to be trained on large-scale PSG datasets that are diverse in population. For the migration challenge, we employ data augmentation techniques, significantly enhancing generalizability. We conducted extensive training and testing on large datasets~(12393 records from 9637 different subjects), achieving an accuracy of 0.803 and a Cohen's Kappa of 0.718. ZeroSleepNet was also deployed in real prototype~(monitoring pads) and tested in actual hospital settings~(265 users), demonstrating an accuracy of 0.697 and a Cohen's Kappa of 0.589. To the best of our knowledge, this work represents the first known reliable BCG-based sleep staging effort and marks a significant step towards in-home health monitoring. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22646v1-abstract-full').style.display = 'none'; document.getElementById('2410.22646v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22078">arXiv:2410.22078</a> <span> [<a href="https://arxiv.org/pdf/2410.22078">pdf</a>, <a href="https://arxiv.org/format/2410.22078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DINeuro: Distilling Knowledge from 2D Natural Images via Deformable Tubular Transferring Strategy for 3D Neuron Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+Y+S">Yik San Cheng</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+R">Runkai Zhao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Heng Wang</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+H">Hanchuan Peng</a>, <a href="/search/eess?searchtype=author&query=Lo%2C+Y">Yui Lo</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuqian Chen</a>, <a href="/search/eess?searchtype=author&query=O%27Donnell%2C+L+J">Lauren J. O'Donnell</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+W">Weidong Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22078v1-abstract-short" style="display: inline;"> Reconstructing neuron morphology from 3D light microscope imaging data is critical to aid neuroscientists in analyzing brain networks and neuroanatomy. With the boost from deep learning techniques, a variety of learning-based segmentation models have been developed to enhance the signal-to-noise ratio of raw neuron images as a pre-processing step in the reconstruction workflow. However, most exist… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22078v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22078v1-abstract-full" style="display: none;"> Reconstructing neuron morphology from 3D light microscope imaging data is critical to aid neuroscientists in analyzing brain networks and neuroanatomy. With the boost from deep learning techniques, a variety of learning-based segmentation models have been developed to enhance the signal-to-noise ratio of raw neuron images as a pre-processing step in the reconstruction workflow. However, most existing models directly encode the latent representative features of volumetric neuron data but neglect their intrinsic morphological knowledge. To address this limitation, we design a novel framework that distills the prior knowledge from a 2D Vision Transformer pre-trained on extensive 2D natural images to facilitate neuronal morphological learning of our 3D Vision Transformer. To bridge the knowledge gap between the 2D natural image and 3D microscopic morphologic domains, we propose a deformable tubular transferring strategy that adapts the pre-trained 2D natural knowledge to the inherent tubular characteristics of neuronal structure in the latent embedding space. The experimental results on the Janelia dataset of the BigNeuron project demonstrate that our method achieves a segmentation performance improvement of 4.53% in mean Dice and 3.56% in mean 95% Hausdorff distance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22078v1-abstract-full').style.display = 'none'; document.getElementById('2410.22078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 3 figures, and 2 tables. This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21320">arXiv:2410.21320</a> <span> [<a href="https://arxiv.org/pdf/2410.21320">pdf</a>, <a href="https://arxiv.org/ps/2410.21320">ps</a>, <a href="https://arxiv.org/format/2410.21320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> The D-Subspace Algorithm for Online Learning over Distributed Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yitong Chen</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+D">Danqi Jin</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jie Chen</a>, <a href="/search/eess?searchtype=author&query=Richard%2C+C">Cedric Richard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21320v2-abstract-short" style="display: inline;"> This material introduces the D-Subspace algorithm derived on the basis of the centralized algorithm [1], which originally addresses parameter estimation problems under a subspace constraint. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21320v2-abstract-full" style="display: none;"> This material introduces the D-Subspace algorithm derived on the basis of the centralized algorithm [1], which originally addresses parameter estimation problems under a subspace constraint. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21320v2-abstract-full').style.display = 'none'; document.getElementById('2410.21320v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19880">arXiv:2410.19880</a> <span> [<a href="https://arxiv.org/pdf/2410.19880">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Implementing Deep Reinforcement Learning-Based Grid Voltage Control in Real-World Power Systems: Challenges and Insights </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+D">Di Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Hong%2C+M">Mingguo Hong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fengyu Wang</a>, <a href="/search/eess?searchtype=author&query=Maslennikov%2C+S">Slava Maslennikov</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+X">Xiaochuan Luo</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yize Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19880v1-abstract-short" style="display: inline;"> Deep reinforcement learning (DRL) holds significant promise for managing voltage control challenges in simulated power grid environments. However, its real-world application in power system operations remains underexplored. This study rigorously evaluates DRL's performance and limitations within actual operational contexts by utilizing detailed experiments across the IEEE 14-bus system, Illinois 2… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19880v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19880v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19880v1-abstract-full" style="display: none;"> Deep reinforcement learning (DRL) holds significant promise for managing voltage control challenges in simulated power grid environments. However, its real-world application in power system operations remains underexplored. This study rigorously evaluates DRL's performance and limitations within actual operational contexts by utilizing detailed experiments across the IEEE 14-bus system, Illinois 200-bus system, and the ISO New England node-breaker model. Our analysis critically assesses DRL's effectiveness for grid control from a system operator's perspective, identifying specific performance bottlenecks. The findings provide actionable insights that highlight the necessity of advancing AI technologies to effectively address the growing complexities of modern power systems. This research underscores the vital role of DRL in enhancing grid management and reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19880v1-abstract-full').style.display = 'none'; document.getElementById('2410.19880v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19283">arXiv:2410.19283</a> <span> [<a href="https://arxiv.org/pdf/2410.19283">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ST-NeRP: Spatial-Temporal Neural Representation Learning with Prior Embedding for Patient-specific Imaging Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qiu%2C+L">Liang Qiu</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+L">Liyue Shen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lianli Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Junyan Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yizheng Chen</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+L">Lei Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19283v1-abstract-short" style="display: inline;"> During and after a course of therapy, imaging is routinely used to monitor the disease progression and assess the treatment responses. Despite of its significance, reliably capturing and predicting the spatial-temporal anatomic changes from a sequence of patient-specific image series presents a considerable challenge. Thus, the development of a computational framework becomes highly desirable for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19283v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19283v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19283v1-abstract-full" style="display: none;"> During and after a course of therapy, imaging is routinely used to monitor the disease progression and assess the treatment responses. Despite of its significance, reliably capturing and predicting the spatial-temporal anatomic changes from a sequence of patient-specific image series presents a considerable challenge. Thus, the development of a computational framework becomes highly desirable for a multitude of practical applications. In this context, we propose a strategy of Spatial-Temporal Neural Representation learning with Prior embedding (ST-NeRP) for patient-specific imaging study. Our strategy involves leveraging an Implicit Neural Representation (INR) network to encode the image at the reference time point into a prior embedding. Subsequently, a spatial-temporally continuous deformation function is learned through another INR network. This network is trained using the whole patient-specific image sequence, enabling the prediction of deformation fields at various target time points. The efficacy of the ST-NeRP model is demonstrated through its application to diverse sequential image series, including 4D CT and longitudinal CT datasets within thoracic and abdominal imaging. The proposed ST-NeRP model exhibits substantial potential in enabling the monitoring of anatomical changes within a patient throughout the therapeutic journey. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19283v1-abstract-full').style.display = 'none'; document.getElementById('2410.19283v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages with 10 figures and 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18784">arXiv:2410.18784</a> <span> [<a href="https://arxiv.org/pdf/2410.18784">pdf</a>, <a href="https://arxiv.org/ps/2410.18784">ps</a>, <a href="https://arxiv.org/format/2410.18784">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Denoising diffusion probabilistic models are optimally adaptive to unknown low dimensionality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Zhihan Huang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+Y">Yuting Wei</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18784v2-abstract-short" style="display: inline;"> The denoising diffusion probabilistic model (DDPM) has emerged as a mainstream generative model in generative AI. While sharp convergence guarantees have been established for the DDPM, the iteration complexity is, in general, proportional to the ambient data dimension, resulting in overly conservative theory that fails to explain its practical efficiency. This has motivated the recent work Li and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18784v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18784v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18784v2-abstract-full" style="display: none;"> The denoising diffusion probabilistic model (DDPM) has emerged as a mainstream generative model in generative AI. While sharp convergence guarantees have been established for the DDPM, the iteration complexity is, in general, proportional to the ambient data dimension, resulting in overly conservative theory that fails to explain its practical efficiency. This has motivated the recent work Li and Yan (2024a) to investigate how the DDPM can achieve sampling speed-ups through automatic exploitation of intrinsic low dimensionality of data. We strengthen this line of work by demonstrating, in some sense, optimal adaptivity to unknown low dimensionality. For a broad class of data distributions with intrinsic dimension $k$, we prove that the iteration complexity of the DDPM scales nearly linearly with $k$, which is optimal when using KL divergence to measure distributional discrepancy. Notably, our work is closely aligned with the independent concurrent work Potaptchik et al. (2024) -- posted two weeks prior to ours -- in establishing nearly linear-$k$ convergence guarantees for the DDPM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18784v2-abstract-full').style.display = 'none'; document.getElementById('2410.18784v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18382">arXiv:2410.18382</a> <span> [<a href="https://arxiv.org/pdf/2410.18382">pdf</a>, <a href="https://arxiv.org/format/2410.18382">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sensing-Communication-Computing-Control Closed-Loop Optimization for 6G Unmanned Robotic Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fang%2C+X">Xinran Fang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+C">Chengleyang Lei</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yunfei Chen</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+M">Ming Xiao</a>, <a href="/search/eess?searchtype=author&query=Ge%2C+N">Ning Ge</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengxiang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18382v1-abstract-short" style="display: inline;"> Rapid advancements in field robots have brought a new kind of cyber physical system (CPS)--unmanned robotic system--under the spotlight. In the upcoming sixth-generation (6G) era, these systems hold great potential to replace humans in hazardous tasks. This paper investigates an unmanned robotic system comprising a multi-functional unmanned aerial vehicle (UAV), sensors, and actuators. The UAV car… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18382v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18382v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18382v1-abstract-full" style="display: none;"> Rapid advancements in field robots have brought a new kind of cyber physical system (CPS)--unmanned robotic system--under the spotlight. In the upcoming sixth-generation (6G) era, these systems hold great potential to replace humans in hazardous tasks. This paper investigates an unmanned robotic system comprising a multi-functional unmanned aerial vehicle (UAV), sensors, and actuators. The UAV carries communication and computing modules, acting as an edge information hub (EIH) that transfers and processes information. During the task execution, the EIH gathers sensing data, calculates control commands, and transmits commands to actuators--leading to reflex-arc-like sensing-communication-computing-control ($\mathbf{SC}^3$) loops. Unlike existing studies that design $\mathbf{SC}^3$ loop components separately, we take each $\mathbf{SC}^3$ loop as an integrated structure and propose a goal-oriented closed-loop optimization scheme. This scheme jointly optimizes uplink and downlink (UL&DL) communication and computing within and across the $\mathbf{SC}^3$ loops to minimize the total linear quadratic regulator (LQR) cost. We derive optimal closed-form solutions for intra-loop allocation and propose an efficient iterative algorithm for inter-loop optimization. Under the condition of adequate CPU frequency availability, we derive an approximate closed-form solution for inter-loop bandwidth allocation. Simulation results demonstrate that the proposed scheme achieves a two-tier task-level balance within and across $\mathbf{SC}^3$ loops. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18382v1-abstract-full').style.display = 'none'; document.getElementById('2410.18382v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18370">arXiv:2410.18370</a> <span> [<a href="https://arxiv.org/pdf/2410.18370">pdf</a>, <a href="https://arxiv.org/format/2410.18370">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Structured Connectivity for 6G Reflex Arc: Task-Oriented Virtual User and New Uplink-Downlink Tradeoff </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fang%2C+X">Xinran Fang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+C">Chengleyang Lei</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yunfei Chen</a>, <a href="/search/eess?searchtype=author&query=Ge%2C+N">Ning Ge</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18370v1-abstract-short" style="display: inline;"> To accommodate the evolving demands of unmanned operations, the future sixth-generation (6G) network will support not only communication links but also sensing-communication-computing-control ($\mathbf{SC}^3$) loops. In each $\mathbf{SC}^3$ cycle, the sensor uploads sensing data to the computing center, and the computing center calculates the control command and sends it to the actuator to take ac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18370v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18370v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18370v1-abstract-full" style="display: none;"> To accommodate the evolving demands of unmanned operations, the future sixth-generation (6G) network will support not only communication links but also sensing-communication-computing-control ($\mathbf{SC}^3$) loops. In each $\mathbf{SC}^3$ cycle, the sensor uploads sensing data to the computing center, and the computing center calculates the control command and sends it to the actuator to take action. To maintain the task-level connections between the sensor-computing center link and the computing center-actuator link, we propose to treat the sensor and actuator as a virtual user. In this way, the two communication links of the $\mathbf{SC}^3$ loop become the uplink and downlink (UL&DL) of the virtual user. Based on the virtual user, we propose a task-oriented UL&DL optimization scheme. This scheme jointly optimizes UL&DL transmit power, time, bandwidth, and CPU frequency to minimize the control linear quadratic regulator (LQR) cost. We decouple the complex problem into a convex UL&DL bandwidth allocation problem with the closed-form solution for the optimal time allocation. Simulation results demonstrate that the proposed scheme achieves a task-level balance between the UL&DL, surpassing conventional communication schemes that optimize each link separately. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18370v1-abstract-full').style.display = 'none'; document.getElementById('2410.18370v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18091">arXiv:2410.18091</a> <span> [<a href="https://arxiv.org/pdf/2410.18091">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Predicting Fine-grained Behavioral and Psychological Symptoms of Dementia Based on Machine Learning and Smart Wearable Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hsu%2C+B+W">Benny Wei-Yun Hsu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yu-Ming Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yuan-Han Yang</a>, <a href="/search/eess?searchtype=author&query=Tseng%2C+V+S">Vincent S. Tseng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18091v1-abstract-short" style="display: inline;"> Behavioral and Psychological Symptoms of Dementia (BPSD) impact dementia care substantially, affecting both patients and caregivers. Effective management and early detection of BPSD are crucial to reduce the stress and burden on caregivers and healthcare systems. Despite the advancements in machine learning for dementia prediction, there is a considerable gap in utilizing these methods for BPSD pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18091v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18091v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18091v1-abstract-full" style="display: none;"> Behavioral and Psychological Symptoms of Dementia (BPSD) impact dementia care substantially, affecting both patients and caregivers. Effective management and early detection of BPSD are crucial to reduce the stress and burden on caregivers and healthcare systems. Despite the advancements in machine learning for dementia prediction, there is a considerable gap in utilizing these methods for BPSD prediction. This study aims to fill this gap by presenting a novel personalized framework for BPSD prediction, utilizing physiological signals from smart wearable devices. Our personalized fine-grained BPSD prediction method accurately predicts BPSD occurrences by extracting individual behavioral patterns, while the generalized models identify diverse patterns and differentiate between various BPSD symptoms. Detailed comparisons between the proposed personalized method and conventional generalized methods reveals substantial improvements across all performance metrics, including a 16.0% increase in AUC. These results demonstrate the potential of our proposed method in advancing dementia care by enabling proactive interventions and improving patient outcomes in real-world scenarios. To the best of our knowledge, this is the first study that leverages physiological signals from smart wearable devices to predict BPSD, marking a significant stride in dementia care research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18091v1-abstract-full').style.display = 'none'; document.getElementById('2410.18091v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17196">arXiv:2410.17196</a> <span> [<a href="https://arxiv.org/pdf/2410.17196">pdf</a>, <a href="https://arxiv.org/format/2410.17196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> VoiceBench: Benchmarking LLM-Based Voice Assistants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yiming Chen</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+X">Xianghu Yue</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiaoxue Gao</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+R+T">Robby T. Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17196v1-abstract-short" style="display: inline;"> Building on the success of large language models (LLMs), recent advancements such as GPT-4o have enabled real-time speech interactions through LLM-based voice assistants, offering a significantly improved user experience compared to traditional text-based interactions. However, the absence of benchmarks designed to evaluate these speech interaction capabilities has hindered progress of LLM-based v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17196v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17196v1-abstract-full" style="display: none;"> Building on the success of large language models (LLMs), recent advancements such as GPT-4o have enabled real-time speech interactions through LLM-based voice assistants, offering a significantly improved user experience compared to traditional text-based interactions. However, the absence of benchmarks designed to evaluate these speech interaction capabilities has hindered progress of LLM-based voice assistants development. Current evaluations focus primarily on automatic speech recognition (ASR) or general knowledge evaluation with clean speeches, neglecting the more intricate, real-world scenarios that involve diverse speaker characteristics, environmental and content factors. To address this, we introduce VoiceBench, the first benchmark designed to provide a multi-faceted evaluation of LLM-based voice assistants. VoiceBench also includes both real and synthetic spoken instructions that incorporate the above three key real-world variations. Extensive experiments reveal the limitations of current LLM-based voice assistant models and offer valuable insights for future research and development in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17196v1-abstract-full').style.display = 'none'; document.getElementById('2410.17196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress. Data is available at https://github.com/MatthewCYM/VoiceBench</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15108">arXiv:2410.15108</a> <span> [<a href="https://arxiv.org/pdf/2410.15108">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The shape of the brain's connections is predictive of cognitive performance: an explainable machine learning study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lo%2C+Y">Yui Lo</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuqian Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+D">Dongnan Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+W">Wan Liu</a>, <a href="/search/eess?searchtype=author&query=Zekelman%2C+L">Leo Zekelman</a>, <a href="/search/eess?searchtype=author&query=Rushmore%2C+J">Jarrett Rushmore</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/eess?searchtype=author&query=Rathi%2C+Y">Yogesh Rathi</a>, <a href="/search/eess?searchtype=author&query=Makris%2C+N">Nikos Makris</a>, <a href="/search/eess?searchtype=author&query=Golby%2C+A+J">Alexandra J. Golby</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+W">Weidong Cai</a>, <a href="/search/eess?searchtype=author&query=O%27Donnell%2C+L+J">Lauren J. O'Donnell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15108v1-abstract-short" style="display: inline;"> The shape of the brain's white matter connections is relatively unexplored in diffusion MRI tractography analysis. While it is known that tract shape varies in populations and across the human lifespan, it is unknown if the variability in dMRI tractography-derived shape may relate to the brain's functional variability across individuals. This work explores the potential of leveraging tractography… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15108v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15108v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15108v1-abstract-full" style="display: none;"> The shape of the brain's white matter connections is relatively unexplored in diffusion MRI tractography analysis. While it is known that tract shape varies in populations and across the human lifespan, it is unknown if the variability in dMRI tractography-derived shape may relate to the brain's functional variability across individuals. This work explores the potential of leveraging tractography fiber cluster shape measures to predict subject-specific cognitive performance. We implement machine learning models to predict individual cognitive performance scores. We study a large-scale database from the HCP-YA study. We apply an atlas-based fiber cluster parcellation to the dMRI tractography of each individual. We compute 15 shape, microstructure, and connectivity features for each fiber cluster. Using these features as input, we train a total of 210 models to predict 7 different NIH Toolbox cognitive performance assessments. We apply an explainable AI technique, SHAP, to assess the importance of each fiber cluster for prediction. Our results demonstrate that shape measures are predictive of individual cognitive performance. The studied shape measures, such as irregularity, diameter, total surface area, volume, and branch volume, are as effective for prediction as microstructure and connectivity measures. The overall best-performing feature is a shape feature, irregularity, which describes how different a cluster's shape is from an idealized cylinder. Further interpretation using SHAP values suggest that fiber clusters with features highly predictive of cognitive ability are widespread throughout the brain, including fiber clusters from the superficial association, deep association, cerebellar, striatal, and projection pathways. This study demonstrates the strong potential of shape descriptors to enhance the study of the brain's white matter and its relationship to cognitive function. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15108v1-abstract-full').style.display = 'none'; document.getElementById('2410.15108v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14214">arXiv:2410.14214</a> <span> [<a href="https://arxiv.org/pdf/2410.14214">pdf</a>, <a href="https://arxiv.org/format/2410.14214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> MambaSCI: Efficient Mamba-UNet for Quad-Bayer Patterned Video Snapshot Compressive Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Pan%2C+Z">Zhenghao Pan</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+J">Jiezhang Cao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yongyong Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14214v1-abstract-short" style="display: inline;"> Color video snapshot compressive imaging (SCI) employs computational imaging techniques to capture multiple sequential video frames in a single Bayer-patterned measurement. With the increasing popularity of quad-Bayer pattern in mainstream smartphone cameras for capturing high-resolution videos, mobile photography has become more accessible to a wider audience. However, existing color video SCI re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14214v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14214v1-abstract-full" style="display: none;"> Color video snapshot compressive imaging (SCI) employs computational imaging techniques to capture multiple sequential video frames in a single Bayer-patterned measurement. With the increasing popularity of quad-Bayer pattern in mainstream smartphone cameras for capturing high-resolution videos, mobile photography has become more accessible to a wider audience. However, existing color video SCI reconstruction algorithms are designed based on the traditional Bayer pattern. When applied to videos captured by quad-Bayer cameras, these algorithms often result in color distortion and ineffective demosaicing, rendering them impractical for primary equipment. To address this challenge, we propose the MambaSCI method, which leverages the Mamba and UNet architectures for efficient reconstruction of quad-Bayer patterned color video SCI. To the best of our knowledge, our work presents the first algorithm for quad-Bayer patterned SCI reconstruction, and also the initial application of the Mamba model to this task. Specifically, we customize Residual-Mamba-Blocks, which residually connect the Spatial-Temporal Mamba (STMamba), Edge-Detail-Reconstruction (EDR) module, and Channel Attention (CA) module. Respectively, STMamba is used to model long-range spatial-temporal dependencies with linear complexity, EDR is for better edge-detail reconstruction, and CA is used to compensate for the missing channel information interaction in Mamba model. Experiments demonstrate that MambaSCI surpasses state-of-the-art methods with lower computational and memory costs. PyTorch style pseudo-code for the core modules is provided in the supplementary materials. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14214v1-abstract-full').style.display = 'none'; document.getElementById('2410.14214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13222">arXiv:2410.13222</a> <span> [<a href="https://arxiv.org/pdf/2410.13222">pdf</a>, <a href="https://arxiv.org/format/2410.13222">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Optimal Covariance Steering of Linear Stochastic Systems with Hybrid Transitions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+H">Hongzhe Yu</a>, <a href="/search/eess?searchtype=author&query=Franco%2C+D+F">Diana Frias Franco</a>, <a href="/search/eess?searchtype=author&query=Johnson%2C+A+M">Aaron M. Johnson</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yongxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13222v1-abstract-short" style="display: inline;"> This work addresses the problem of optimally steering the state covariance of a linear stochastic system from an initial to a target, subject to hybrid transitions. The nonlinear and discontinuous jump dynamics complicate the control design for hybrid systems. Under uncertainties, stochastic jump timing and state variations further intensify this challenge. This work aims to regulate the hybrid sy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13222v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13222v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13222v1-abstract-full" style="display: none;"> This work addresses the problem of optimally steering the state covariance of a linear stochastic system from an initial to a target, subject to hybrid transitions. The nonlinear and discontinuous jump dynamics complicate the control design for hybrid systems. Under uncertainties, stochastic jump timing and state variations further intensify this challenge. This work aims to regulate the hybrid system's state trajectory to stay close to a nominal deterministic one, despite uncertainties and noises. We address this problem by directly controlling state covariances around a mean trajectory, and this problem is termed the Hybrid Covariance Steering (H-CS) problem. The jump dynamics are approximated to the first order by leveraging the Saltation Matrix. When the jump dynamics are nonsingular, we derive an analytical closed-form solution to the H-CS problem. For general jump dynamics with possible singularity and changes in the state dimensions, we reformulate the problem into a convex optimization over path distributions by leveraging Schrodinger's Bridge duality to the smooth covariance control problem. The covariance propagation at hybrid events is enforced as equality constraints to handle singularity issues. The proposed convex framework scales linearly with the number of jump events, ensuring efficient, optimal solutions. This work thus provides a computationally efficient solution to the general H-CS problem. Numerical experiments are conducted to validate the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13222v1-abstract-full').style.display = 'none'; document.getElementById('2410.13222v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13067">arXiv:2410.13067</a> <span> [<a href="https://arxiv.org/pdf/2410.13067">pdf</a>, <a href="https://arxiv.org/format/2410.13067">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Two-Timescale Linear Stochastic Approximation: Constant Stepsizes Go a Long Way </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kwon%2C+J">Jeongyeol Kwon</a>, <a href="/search/eess?searchtype=author&query=Dotson%2C+L">Luke Dotson</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yudong Chen</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+Q">Qiaomin Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13067v1-abstract-short" style="display: inline;"> Previous studies on two-timescale stochastic approximation (SA) mainly focused on bounding mean-squared errors under diminishing stepsize schemes. In this work, we investigate {\it constant} stpesize schemes through the lens of Markov processes, proving that the iterates of both timescales converge to a unique joint stationary distribution in Wasserstein metric. We derive explicit geometric and no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13067v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13067v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13067v1-abstract-full" style="display: none;"> Previous studies on two-timescale stochastic approximation (SA) mainly focused on bounding mean-squared errors under diminishing stepsize schemes. In this work, we investigate {\it constant} stpesize schemes through the lens of Markov processes, proving that the iterates of both timescales converge to a unique joint stationary distribution in Wasserstein metric. We derive explicit geometric and non-asymptotic convergence rates, as well as the variance and bias introduced by constant stepsizes in the presence of Markovian noise. Specifically, with two constant stepsizes $伪< 尾$, we show that the biases scale linearly with both stepsizes as $螛(伪)+螛(尾)$ up to higher-order terms, while the variance of the slower iterate (resp., faster iterate) scales only with its own stepsize as $O(伪)$ (resp., $O(尾)$). Unlike previous work, our results require no additional assumptions such as $尾^2 \ll 伪$ nor extra dependence on dimensions. These fine-grained characterizations allow tail-averaging and extrapolation techniques to reduce variance and bias, improving mean-squared error bound to $O(尾^4 + \frac{1}{t})$ for both iterates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13067v1-abstract-full').style.display = 'none'; document.getElementById('2410.13067v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11148">arXiv:2410.11148</a> <span> [<a href="https://arxiv.org/pdf/2410.11148">pdf</a>, <a href="https://arxiv.org/format/2410.11148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep unrolled primal dual network for TOF-PET list-mode image reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+R">Rui Hu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenxu Li</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+K">Kun Tian</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+J">Jianan Cui</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yunmei Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Huafeng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11148v1-abstract-short" style="display: inline;"> Time-of-flight (TOF) information provides more accurate location data for annihilation photons, thereby enhancing the quality of PET reconstruction images and reducing noise. List-mode reconstruction has a significant advantage in handling TOF information. However, current advanced TOF PET list-mode reconstruction algorithms still require improvements when dealing with low-count data. Deep learnin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11148v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11148v1-abstract-full" style="display: none;"> Time-of-flight (TOF) information provides more accurate location data for annihilation photons, thereby enhancing the quality of PET reconstruction images and reducing noise. List-mode reconstruction has a significant advantage in handling TOF information. However, current advanced TOF PET list-mode reconstruction algorithms still require improvements when dealing with low-count data. Deep learning algorithms have shown promising results in PET image reconstruction. Nevertheless, the incorporation of TOF information poses significant challenges related to the storage space required by deep learning methods, particularly for the advanced deep unrolled methods. In this study, we propose a deep unrolled primal dual network for TOF-PET list-mode reconstruction. The network is unrolled into multiple phases, with each phase comprising a dual network for list-mode domain updates and a primal network for image domain updates. We utilize CUDA for parallel acceleration and computation of the system matrix for TOF list-mode data, and we adopt a dynamic access strategy to mitigate memory consumption. Reconstructed images of different TOF resolutions and different count levels show that the proposed method outperforms the LM-OSEM, LM-EMTV, LM-SPDHG,LM-SPDHG-TV and FastPET method in both visually and quantitative analysis. These results demonstrate the potential application of deep unrolled methods for TOF-PET list-mode data and show better performance than current mainstream TOF-PET list-mode reconstruction algorithms, providing new insights for the application of deep learning methods in TOF list-mode data. The codes for this work are available at https://github.com/RickHH/LMPDnet <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11148v1-abstract-full').style.display = 'none'; document.getElementById('2410.11148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09358">arXiv:2410.09358</a> <span> [<a href="https://arxiv.org/pdf/2410.09358">pdf</a>, <a href="https://arxiv.org/format/2410.09358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Moving Arrays for Near-Field Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yilong Chen</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+Z">Zixiang Ren</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lei Liu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+J">Jie Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09358v1-abstract-short" style="display: inline;"> This letter exploits moving arrays to enable nearfield multiple-input multiple-output (MIMO) sensing via a limited number of antenna elements. We consider a scenario where a base station (BS) is equipped with a uniform linear array (ULA) on a moving platform. The objective is to locate a point target in the two-dimensional (2D) space by leveraging the near-field channel characteristics created by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09358v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09358v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09358v1-abstract-full" style="display: none;"> This letter exploits moving arrays to enable nearfield multiple-input multiple-output (MIMO) sensing via a limited number of antenna elements. We consider a scenario where a base station (BS) is equipped with a uniform linear array (ULA) on a moving platform. The objective is to locate a point target in the two-dimensional (2D) space by leveraging the near-field channel characteristics created by the movement of antenna arrays. Under this setup, we analyze the Cramer-Rao bound (CRB) for estimating the target's 2D coordinate, which provides the fundamental sensing performance limits for localization. It is revealed that our proposed design with a moving array achieves a CRB that is proportional to the CRB obtained by an equivalent extremely large ULA matching the platform's size. This shows that the movement of antenna array significantly enlarges its effective aperture to enable near-field sensing. Numerical results show that the proposed moving array design substantially enhances the target estimation performance compared to the conventional fixed array benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09358v1-abstract-full').style.display = 'none'; document.getElementById('2410.09358v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09250">arXiv:2410.09250</a> <span> [<a href="https://arxiv.org/pdf/2410.09250">pdf</a>, <a href="https://arxiv.org/format/2410.09250">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Quantum-Trained Convolutional Neural Network for Deepfake Audio Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+C+A">Chu-Hsuan Abraham Lin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chen-Yu Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+S+Y">Samuel Yen-Chi Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+K">Kuan-Cheng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09250v1-abstract-short" style="display: inline;"> The rise of deepfake technologies has posed significant challenges to privacy, security, and information integrity, particularly in audio and multimedia content. This paper introduces a Quantum-Trained Convolutional Neural Network (QT-CNN) framework designed to enhance the detection of deepfake audio, leveraging the computational power of quantum machine learning (QML). The QT-CNN employs a hybrid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09250v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09250v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09250v1-abstract-full" style="display: none;"> The rise of deepfake technologies has posed significant challenges to privacy, security, and information integrity, particularly in audio and multimedia content. This paper introduces a Quantum-Trained Convolutional Neural Network (QT-CNN) framework designed to enhance the detection of deepfake audio, leveraging the computational power of quantum machine learning (QML). The QT-CNN employs a hybrid quantum-classical approach, integrating Quantum Neural Networks (QNNs) with classical neural architectures to optimize training efficiency while reducing the number of trainable parameters. Our method incorporates a novel quantum-to-classical parameter mapping that effectively utilizes quantum states to enhance the expressive power of the model, achieving up to 70% parameter reduction compared to classical models without compromising accuracy. Data pre-processing involved extracting essential audio features, label encoding, feature scaling, and constructing sequential datasets for robust model evaluation. Experimental results demonstrate that the QT-CNN achieves comparable performance to traditional CNNs, maintaining high accuracy during training and testing phases across varying configurations of QNN blocks. The QT framework's ability to reduce computational overhead while maintaining performance underscores its potential for real-world applications in deepfake detection and other resource-constrained scenarios. This work highlights the practical benefits of integrating quantum computing into artificial intelligence, offering a scalable and efficient approach to advancing deepfake detection technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09250v1-abstract-full').style.display = 'none'; document.getElementById('2410.09250v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08534">arXiv:2410.08534</a> <span> [<a href="https://arxiv.org/pdf/2410.08534">pdf</a>, <a href="https://arxiv.org/format/2410.08534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Quality Prediction of AI Generated Images and Videos: Emerging Trends and Opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ghildyal%2C+A">Abhijay Ghildyal</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuanhan Chen</a>, <a href="/search/eess?searchtype=author&query=Zadtootaghaj%2C+S">Saman Zadtootaghaj</a>, <a href="/search/eess?searchtype=author&query=Barman%2C+N">Nabajeet Barman</a>, <a href="/search/eess?searchtype=author&query=Bovik%2C+A+C">Alan C. Bovik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08534v2-abstract-short" style="display: inline;"> The advent of AI has influenced many aspects of human life, from self-driving cars and intelligent chatbots to text-based image and video generation models capable of creating realistic images and videos based on user prompts (text-to-image, image-to-image, and image-to-video). AI-based methods for image and video super resolution, video frame interpolation, denoising, and compression have already… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08534v2-abstract-full').style.display = 'inline'; document.getElementById('2410.08534v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08534v2-abstract-full" style="display: none;"> The advent of AI has influenced many aspects of human life, from self-driving cars and intelligent chatbots to text-based image and video generation models capable of creating realistic images and videos based on user prompts (text-to-image, image-to-image, and image-to-video). AI-based methods for image and video super resolution, video frame interpolation, denoising, and compression have already gathered significant attention and interest in the industry and some solutions are already being implemented in real-world products and services. However, to achieve widespread integration and acceptance, AI-generated and enhanced content must be visually accurate, adhere to intended use, and maintain high visual quality to avoid degrading the end user's quality of experience (QoE). One way to monitor and control the visual "quality" of AI-generated and -enhanced content is by deploying Image Quality Assessment (IQA) and Video Quality Assessment (VQA) models. However, most existing IQA and VQA models measure visual fidelity in terms of "reconstruction" quality against a pristine reference content and were not designed to assess the quality of "generative" artifacts. To address this, newer metrics and models have recently been proposed, but their performance evaluation and overall efficacy have been limited by datasets that were too small or otherwise lack representative content and/or distortion capacity; and by performance measures that can accurately report the success of an IQA/VQA model for "GenAI". This paper examines the current shortcomings and possibilities presented by AI-generated and enhanced image and video content, with a particular focus on end-user perceived quality. Finally, we discuss open questions and make recommendations for future work on the "GenAI" quality assessment problems, towards further progressing on this interesting and relevant field of research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08534v2-abstract-full').style.display = 'none'; document.getElementById('2410.08534v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">"The abstract field cannot be longer than 1,920 characters", the abstract appearing here is slightly shorter than that in the PDF file</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08508">arXiv:2410.08508</a> <span> [<a href="https://arxiv.org/pdf/2410.08508">pdf</a>, <a href="https://arxiv.org/format/2410.08508">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Accelerated Distributed Stochastic Non-Convex Optimization over Time-Varying Directed Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yiyue Chen</a>, <a href="/search/eess?searchtype=author&query=Hashemi%2C+A">Abolfazl Hashemi</a>, <a href="/search/eess?searchtype=author&query=Vikalo%2C+H">Haris Vikalo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08508v1-abstract-short" style="display: inline;"> Distributed stochastic non-convex optimization problems have recently received attention due to the growing interest of signal processing, computer vision, and natural language processing communities in applications deployed over distributed learning systems (e.g., federated learning). We study the setting where the data is distributed across the nodes of a time-varying directed network, a topolog… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08508v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08508v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08508v1-abstract-full" style="display: none;"> Distributed stochastic non-convex optimization problems have recently received attention due to the growing interest of signal processing, computer vision, and natural language processing communities in applications deployed over distributed learning systems (e.g., federated learning). We study the setting where the data is distributed across the nodes of a time-varying directed network, a topology suitable for modeling dynamic networks experiencing communication delays and straggler effects. The network nodes, which can access only their local objectives and query a stochastic first-order oracle to obtain gradient estimates, collaborate to minimize a global objective function by exchanging messages with their neighbors. We propose an algorithm, novel to this setting, that leverages stochastic gradient descent with momentum and gradient tracking to solve distributed non-convex optimization problems over time-varying networks. To analyze the algorithm, we tackle the challenges that arise when analyzing dynamic network systems which communicate gradient acceleration components. We prove that the algorithm's oracle complexity is $\mathcal{O}(1/蔚^{1.5})$, and that under Polyak-$艁$ojasiewicz condition the algorithm converges linearly to a steady error state. The proposed scheme is tested on several learning tasks: a non-convex logistic regression experiment on the MNIST dataset, an image classification task on the CIFAR-10 dataset, and an NLP classification test on the IMDB dataset. We further present numerical simulations with an objective that satisfies the PL condition. The results demonstrate superior performance of the proposed framework compared to the existing related methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08508v1-abstract-full').style.display = 'none'; document.getElementById('2410.08508v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been accepted at IEEE Transactions on Automatic Control</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08329">arXiv:2410.08329</a> <span> [<a href="https://arxiv.org/pdf/2410.08329">pdf</a>, <a href="https://arxiv.org/format/2410.08329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Physics and Deep Learning in Computational Wave Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Youzuo Lin</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shihang Feng</a>, <a href="/search/eess?searchtype=author&query=Theiler%2C+J">James Theiler</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinpeng Chen</a>, <a href="/search/eess?searchtype=author&query=Villa%2C+U">Umberto Villa</a>, <a href="/search/eess?searchtype=author&query=Rao%2C+J">Jing Rao</a>, <a href="/search/eess?searchtype=author&query=Greenhall%2C+J">John Greenhall</a>, <a href="/search/eess?searchtype=author&query=Pantea%2C+C">Cristian Pantea</a>, <a href="/search/eess?searchtype=author&query=Anastasio%2C+M+A">Mark A. Anastasio</a>, <a href="/search/eess?searchtype=author&query=Wohlberg%2C+B">Brendt Wohlberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08329v1-abstract-short" style="display: inline;"> Computational wave imaging (CWI) extracts hidden structure and physical properties of a volume of material by analyzing wave signals that traverse that volume. Applications include seismic exploration of the Earth's subsurface, acoustic imaging and non-destructive testing in material science, and ultrasound computed tomography in medicine. Current approaches for solving CWI problems can be divided… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08329v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08329v1-abstract-full" style="display: none;"> Computational wave imaging (CWI) extracts hidden structure and physical properties of a volume of material by analyzing wave signals that traverse that volume. Applications include seismic exploration of the Earth's subsurface, acoustic imaging and non-destructive testing in material science, and ultrasound computed tomography in medicine. Current approaches for solving CWI problems can be divided into two categories: those rooted in traditional physics, and those based on deep learning. Physics-based methods stand out for their ability to provide high-resolution and quantitatively accurate estimates of acoustic properties within the medium. However, they can be computationally intensive and are susceptible to ill-posedness and nonconvexity typical of CWI problems. Machine learning-based computational methods have recently emerged, offering a different perspective to address these challenges. Diverse scientific communities have independently pursued the integration of deep learning in CWI. This review delves into how contemporary scientific machine-learning (ML) techniques, and deep neural networks in particular, have been harnessed to tackle CWI problems. We present a structured framework that consolidates existing research spanning multiple domains, including computational imaging, wave physics, and data science. This study concludes with important lessons learned from existing ML-based methods and identifies technical hurdles and emerging trends through a systematic analysis of the extensive literature on this topic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08329v1-abstract-full').style.display = 'none'; document.getElementById('2410.08329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06998">arXiv:2410.06998</a> <span> [<a href="https://arxiv.org/pdf/2410.06998">pdf</a>, <a href="https://arxiv.org/format/2410.06998">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> An Improved ESO-Based Line-of-Sight Guidance Law for Path Following of Underactuated Autonomous Underwater Helicopter With Nonlinear Tracking Differentiator and Anti-saturation Controller </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Haoda Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zichen Liu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jin Huang</a>, <a href="/search/eess?searchtype=author&query=An%2C+X">Xinyu An</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Ying Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06998v1-abstract-short" style="display: inline;"> This paper presents an Improved Extended-state-observer based Line-of-Sight (IELOS) guidance law for path following of underactuated Autonomous Underwater helicopter (AUH) utilizing a nonlinear tracking differentiator and anti-saturation controller. Due to the high mobility of the AUH, the classical reduced-order Extended-State-Observer (ESO) struggles to accurately track the sideslip angle, espec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06998v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06998v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06998v1-abstract-full" style="display: none;"> This paper presents an Improved Extended-state-observer based Line-of-Sight (IELOS) guidance law for path following of underactuated Autonomous Underwater helicopter (AUH) utilizing a nonlinear tracking differentiator and anti-saturation controller. Due to the high mobility of the AUH, the classical reduced-order Extended-State-Observer (ESO) struggles to accurately track the sideslip angle, especially when rapid variation occurs. By incorporating the nonlinear tracking differentiator and anti-saturation controller, the IELOS guidance law can precisely track sideslip angle and mitigate propeller thrust buffet compared to the classical Extended-state-observer based Line-of-Sight (ELOS) guidance law. The performance of ESO is significantly influenced by the bandwidth, with the Improved Extended-State-Observer (IESO) proving effective at low bandwidths where the classical ESO falls short. The paper establishes the input-to-state stability of the closed-loop system. Subsequently, simulation and pool experimental results are showcased to validate the effectiveness of the IELOS guidance law, which outperforms both the Line-of-Sight (LOS) and Adaptive Line-of-Sight (ALOS) guidance laws in terms of performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06998v1-abstract-full').style.display = 'none'; document.getElementById('2410.06998v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06997">arXiv:2410.06997</a> <span> [<a href="https://arxiv.org/pdf/2410.06997">pdf</a>, <a href="https://arxiv.org/format/2410.06997">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Diffusion-based Xray2MRI Model: Generating Pseudo-MRI Volumes From one Single X-ray </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/eess?searchtype=author&query=Jennane%2C+R">Rachid Jennane</a>, <a href="/search/eess?searchtype=author&query=Chetouani%2C+A">Aladine Chetouani</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y+H">Yung Hsin Chen</a>, <a href="/search/eess?searchtype=author&query=Bauer%2C+F">Fabian Bauer</a>, <a href="/search/eess?searchtype=author&query=Jarraya%2C+M">Mohamed Jarraya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06997v2-abstract-short" style="display: inline;"> Knee osteoarthritis (KOA) is a prevalent musculoskeletal disorder, and X-rays are commonly used for its diagnosis due to their cost-effectiveness. Magnetic Resonance Imaging (MRI), on the other hand, offers detailed soft tissue visualization and has become a valuable supplementary diagnostic tool for KOA. Unfortunately, the high cost and limited accessibility of MRI hinders its widespread use, lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06997v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06997v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06997v2-abstract-full" style="display: none;"> Knee osteoarthritis (KOA) is a prevalent musculoskeletal disorder, and X-rays are commonly used for its diagnosis due to their cost-effectiveness. Magnetic Resonance Imaging (MRI), on the other hand, offers detailed soft tissue visualization and has become a valuable supplementary diagnostic tool for KOA. Unfortunately, the high cost and limited accessibility of MRI hinders its widespread use, leaving many patients with KOA to rely solely on X-ray imaging. In this study, we introduce a novel diffusion-based Xray2MRI model capable of generating pseudo-MRI volumes from a single X-ray image. In addition to using X-rays as conditional input, our model integrates target depth, KOA probability distribution, and image intensity distribution modules to guide the synthesis process, ensuring that the generated corresponding slices accurately correspond to the anatomical structures. Experimental results demonstrate that by integrating information from X-rays with additional input data, our proposed approach is capable of generating pseudo-MRI sequences that approximate real MRI scans. In addition, by increasing the number of inference steps, the model achieves effective interpolation, which further improves the continuity and smoothness of the generated MRI sequences, representing a promising first attempt at cost-effective medical imaging solutions. This study is available on https://zwang78.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06997v2-abstract-full').style.display = 'none'; document.getElementById('2410.06997v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06885">arXiv:2410.06885</a> <span> [<a href="https://arxiv.org/pdf/2410.06885">pdf</a>, <a href="https://arxiv.org/ps/2410.06885">ps</a>, <a href="https://arxiv.org/format/2410.06885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yushen Chen</a>, <a href="/search/eess?searchtype=author&query=Niu%2C+Z">Zhikang Niu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+K">Keqi Deng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chunhui Wang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+J">Jian Zhao</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+K">Kai Yu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06885v2-abstract-short" style="display: inline;"> This paper introduces F5-TTS, a fully non-autoregressive text-to-speech system based on flow matching with Diffusion Transformer (DiT). Without requiring complex designs such as duration model, text encoder, and phoneme alignment, the text input is simply padded with filler tokens to the same length as input speech, and then the denoising is performed for speech generation, which was originally pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06885v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06885v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06885v2-abstract-full" style="display: none;"> This paper introduces F5-TTS, a fully non-autoregressive text-to-speech system based on flow matching with Diffusion Transformer (DiT). Without requiring complex designs such as duration model, text encoder, and phoneme alignment, the text input is simply padded with filler tokens to the same length as input speech, and then the denoising is performed for speech generation, which was originally proved feasible by E2 TTS. However, the original design of E2 TTS makes it hard to follow due to its slow convergence and low robustness. To address these issues, we first model the input with ConvNeXt to refine the text representation, making it easy to align with the speech. We further propose an inference-time Sway Sampling strategy, which significantly improves our model's performance and efficiency. This sampling strategy for flow step can be easily applied to existing flow matching based models without retraining. Our design allows faster training and achieves an inference RTF of 0.15, which is greatly improved compared to state-of-the-art diffusion-based TTS models. Trained on a public 100K hours multilingual dataset, our Fairytaler Fakes Fluent and Faithful speech with Flow matching (F5-TTS) exhibits highly natural and expressive zero-shot ability, seamless code-switching capability, and speed control efficiency. Demo samples can be found at https://SWivid.github.io/F5-TTS. We release all code and checkpoints to promote community development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06885v2-abstract-full').style.display = 'none'; document.getElementById('2410.06885v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06880">arXiv:2410.06880</a> <span> [<a href="https://arxiv.org/pdf/2410.06880">pdf</a>, <a href="https://arxiv.org/ps/2410.06880">ps</a>, <a href="https://arxiv.org/format/2410.06880">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Cooperative UAV-Relay based Satellite Aerial Ground Integrated Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bhola"> Bhola</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yu-Jia Chen</a>, <a href="/search/eess?searchtype=author&query=Balakrishnan%2C+A">Ashutosh Balakrishnan</a>, <a href="/search/eess?searchtype=author&query=De%2C+S">Swades De</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Li-Chun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06880v1-abstract-short" style="display: inline;"> In the post-fifth generation (5G) era, escalating user quality of service (QoS) strains terrestrial network capacity, especially in urban areas with dynamic traffic distributions. This paper introduces a novel cooperative unmanned aerial vehicle relay-based deployment (CUD) framework in satellite air-ground integrated networks (SAGIN). The CUD strategy deploys an unmanned aerial vehicle-based rela… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06880v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06880v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06880v1-abstract-full" style="display: none;"> In the post-fifth generation (5G) era, escalating user quality of service (QoS) strains terrestrial network capacity, especially in urban areas with dynamic traffic distributions. This paper introduces a novel cooperative unmanned aerial vehicle relay-based deployment (CUD) framework in satellite air-ground integrated networks (SAGIN). The CUD strategy deploys an unmanned aerial vehicle-based relay (UAVr) in an amplify-andforward (AF) mode to enhance user QoS when terrestrial base stations fall short of network capacity. By combining low earth orbit (LEO) satellite and UAVr signals using cooperative diversity, the CUD framework enhances the signal to noise ratio (SNR) at the user. Comparative evaluations against existing frameworks reveal performance improvements, demonstrating the effectiveness of the CUD framework in addressing the evolving demands of next-generation networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06880v1-abstract-full').style.display = 'none'; document.getElementById('2410.06880v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, to appear in IEEE 100th Vehicular Technology Conference (VTC2024-Fall)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04702">arXiv:2410.04702</a> <span> [<a href="https://arxiv.org/pdf/2410.04702">pdf</a>, <a href="https://arxiv.org/format/2410.04702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Demo of Zero-Shot Guitar Amplifier Modelling: Enhancing Modeling with Hyper Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yu-Hua Chen</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Y">Yuan-Chiao Cheng</a>, <a href="/search/eess?searchtype=author&query=Yeh%2C+Y">Yen-Tung Yeh</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jui-Te Wu</a>, <a href="/search/eess?searchtype=author&query=Ho%2C+Y">Yu-Hsiang Ho</a>, <a href="/search/eess?searchtype=author&query=Jang%2C+J+R">Jyh-Shing Roger Jang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yi-Hsuan Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04702v1-abstract-short" style="display: inline;"> Electric guitar tone modeling typically focuses on the non-linear transformation from clean to amplifier-rendered audio. Traditional methods rely on one-to-one mappings, incorporating device parameters into neural models to replicate specific amplifiers. However, these methods are limited by the need for specific training data. In this paper, we adapt a model based on the previous work, which leve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04702v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04702v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04702v1-abstract-full" style="display: none;"> Electric guitar tone modeling typically focuses on the non-linear transformation from clean to amplifier-rendered audio. Traditional methods rely on one-to-one mappings, incorporating device parameters into neural models to replicate specific amplifiers. However, these methods are limited by the need for specific training data. In this paper, we adapt a model based on the previous work, which leverages a tone embedding encoder and a feature wise linear modulation (FiLM) condition method. In this work, we altered conditioning method using a hypernetwork-based gated convolutional network (GCN) to generate audio that blends clean input with the tone characteristics of reference audio. By extending the training data to cover a wider variety of amplifier tones, our model is able to capture a broader range of tones. Additionally, we developed a real-time plugin to demonstrate the system's practical application, allowing users to experience its performance interactively. Our results indicate that the proposed system achieves superior tone modeling versatility compared to traditional methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04702v1-abstract-full').style.display = 'none'; document.getElementById('2410.04702v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">demo of the ISMIR paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04414">arXiv:2410.04414</a> <span> [<a href="https://arxiv.org/pdf/2410.04414">pdf</a>, <a href="https://arxiv.org/format/2410.04414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Spatial Multiplexing Oriented Channel Reconfiguration in Multi-IRS Aided MIMO Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+G">Guangji Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04414v1-abstract-short" style="display: inline;"> Spatial multiplexing plays a significant role in improving the capacity of multiple-input multiple-output (MIMO) communication systems. To improve the spectral efficiency (SE) of a point-to-point MIMO system, we exploit the channel reconfiguration capabilities provided by multiple intelligent reflecting surfaces (IRSs) to enhance the spatial multiplexing. Unlike most existing works, we address bot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04414v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04414v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04414v1-abstract-full" style="display: none;"> Spatial multiplexing plays a significant role in improving the capacity of multiple-input multiple-output (MIMO) communication systems. To improve the spectral efficiency (SE) of a point-to-point MIMO system, we exploit the channel reconfiguration capabilities provided by multiple intelligent reflecting surfaces (IRSs) to enhance the spatial multiplexing. Unlike most existing works, we address both the issues of the IRSs placement and elements allocation. To this end, we first introduce an orthogonal placement strategy to mitigate channel correlation, thereby enabling interference-free multi-stream transmission. Subsequently, we propose a successive convex approximation (SCA)-based approach to jointly optimize the IRS elements and power allocation. Our theoretical analysis unveils that equal IRS elements/power allocation scheme becomes asymptotically optimal as the number of IRS elements and transmit power tend to be infinite. Numerical results demonstrate that when the total number of IRS elements or the power exceeds a certain threshold, a multi-IRS assisted system outperforms a single IRS configuration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04414v1-abstract-full').style.display = 'none'; document.getElementById('2410.04414v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03898">arXiv:2410.03898</a> <span> [<a href="https://arxiv.org/pdf/2410.03898">pdf</a>, <a href="https://arxiv.org/format/2410.03898">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> On the Rate-Distortion-Complexity Trade-offs of Neural Video Coding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yi-Hsin Chen</a>, <a href="/search/eess?searchtype=author&query=Ho%2C+K">Kuan-Wei Ho</a>, <a href="/search/eess?searchtype=author&query=Benjak%2C+M">Martin Benjak</a>, <a href="/search/eess?searchtype=author&query=Ostermann%2C+J">J枚rn Ostermann</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+W">Wen-Hsiao Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03898v1-abstract-short" style="display: inline;"> This paper aims to delve into the rate-distortion-complexity trade-offs of modern neural video coding. Recent years have witnessed much research effort being focused on exploring the full potential of neural video coding. Conditional autoencoders have emerged as the mainstream approach to efficient neural video coding. The central theme of conditional autoencoders is to leverage both spatial and t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03898v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03898v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03898v1-abstract-full" style="display: none;"> This paper aims to delve into the rate-distortion-complexity trade-offs of modern neural video coding. Recent years have witnessed much research effort being focused on exploring the full potential of neural video coding. Conditional autoencoders have emerged as the mainstream approach to efficient neural video coding. The central theme of conditional autoencoders is to leverage both spatial and temporal information for better conditional coding. However, a recent study indicates that conditional coding may suffer from information bottlenecks, potentially performing worse than traditional residual coding. To address this issue, recent conditional coding methods incorporate a large number of high-resolution features as the condition signal, leading to a considerable increase in the number of multiply-accumulate operations, memory footprint, and model size. Taking DCVC as the common code base, we investigate how the newly proposed conditional residual coding, an emerging new school of thought, and its variants may strike a better balance among rate, distortion, and complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03898v1-abstract-full').style.display = 'none'; document.getElementById('2410.03898v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to MMSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03134">arXiv:2410.03134</a> <span> [<a href="https://arxiv.org/pdf/2410.03134">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Remaining Useful Life Prediction: A Study on Multidimensional Industrial Signal Processing and Efficient Transfer Learning Based on Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yan Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Cheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03134v1-abstract-short" style="display: inline;"> Remaining useful life (RUL) prediction is crucial for maintaining modern industrial systems, where equipment reliability and operational safety are paramount. Traditional methods, based on small-scale deep learning or physical/statistical models, often struggle with complex, multidimensional sensor data and varying operating conditions, limiting their generalization capabilities. To address these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03134v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03134v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03134v1-abstract-full" style="display: none;"> Remaining useful life (RUL) prediction is crucial for maintaining modern industrial systems, where equipment reliability and operational safety are paramount. Traditional methods, based on small-scale deep learning or physical/statistical models, often struggle with complex, multidimensional sensor data and varying operating conditions, limiting their generalization capabilities. To address these challenges, this paper introduces an innovative regression framework utilizing large language models (LLMs) for RUL prediction. By leveraging the modeling power of LLMs pre-trained on corpus data, the proposed model can effectively capture complex temporal dependencies and improve prediction accuracy. Extensive experiments on the Turbofan engine's RUL prediction task show that the proposed model surpasses state-of-the-art (SOTA) methods on the challenging FD002 and FD004 subsets and achieves near-SOTA results on the other subsets. Notably, different from previous research, our framework uses the same sliding window length and all sensor signals for all subsets, demonstrating strong consistency and generalization. Moreover, transfer learning experiments reveal that with minimal target domain data for fine-tuning, the model outperforms SOTA methods trained on full target domain data. This research highlights the significant potential of LLMs in industrial signal processing and RUL prediction, offering a forward-looking solution for health management in future intelligent industrial systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03134v1-abstract-full').style.display = 'none'; document.getElementById('2410.03134v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02236">arXiv:2410.02236</a> <span> [<a href="https://arxiv.org/pdf/2410.02236">pdf</a>, <a href="https://arxiv.org/format/2410.02236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> C-MORL: Multi-Objective Reinforcement Learning through Efficient Discovery of Pareto Front </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+R">Ruohong Liu</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+Y">Yuxin Pan</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Linjie Xu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+L">Lei Song</a>, <a href="/search/eess?searchtype=author&query=You%2C+P">Pengcheng You</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yize Chen</a>, <a href="/search/eess?searchtype=author&query=Bian%2C+J">Jiang Bian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02236v1-abstract-short" style="display: inline;"> Multi-objective reinforcement learning (MORL) excels at handling rapidly changing preferences in tasks that involve multiple criteria, even for unseen preferences. However, previous dominating MORL methods typically generate a fixed policy set or preference-conditioned policy through multiple training iterations exclusively for sampled preference vectors, and cannot ensure the efficient discovery… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02236v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02236v1-abstract-full" style="display: none;"> Multi-objective reinforcement learning (MORL) excels at handling rapidly changing preferences in tasks that involve multiple criteria, even for unseen preferences. However, previous dominating MORL methods typically generate a fixed policy set or preference-conditioned policy through multiple training iterations exclusively for sampled preference vectors, and cannot ensure the efficient discovery of the Pareto front. Furthermore, integrating preferences into the input of policy or value functions presents scalability challenges, in particular as the dimension of the state and preference space grow, which can complicate the learning process and hinder the algorithm's performance on more complex tasks. To address these issues, we propose a two-stage Pareto front discovery algorithm called Constrained MORL (C-MORL), which serves as a seamless bridge between constrained policy optimization and MORL. Concretely, a set of policies is trained in parallel in the initialization stage, with each optimized towards its individual preference over the multiple objectives. Then, to fill the remaining vacancies in the Pareto front, the constrained optimization steps are employed to maximize one objective while constraining the other objectives to exceed a predefined threshold. Empirically, compared to recent advancements in MORL methods, our algorithm achieves more consistent and superior performances in terms of hypervolume, expected utility, and sparsity on both discrete and continuous control tasks, especially with numerous objectives (up to nine objectives in our experiments). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02236v1-abstract-full').style.display = 'none'; document.getElementById('2410.02236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 8 figues. In Submission to a conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02231">arXiv:2410.02231</a> <span> [<a href="https://arxiv.org/pdf/2410.02231">pdf</a>, <a href="https://arxiv.org/format/2410.02231">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> SEAL: SEmantic-Augmented Imitation Learning via Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gu%2C+C">Chengyang Gu</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+Y">Yuxin Pan</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+H">Haotian Bai</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+H">Hui Xiong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yize Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02231v1-abstract-short" style="display: inline;"> Hierarchical Imitation Learning (HIL) is a promising approach for tackling long-horizon decision-making tasks. While it is a challenging task due to the lack of detailed supervisory labels for sub-goal learning, and reliance on hundreds to thousands of expert demonstrations. In this work, we introduce SEAL, a novel framework that leverages Large Language Models (LLMs)'s powerful semantic and world… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02231v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02231v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02231v1-abstract-full" style="display: none;"> Hierarchical Imitation Learning (HIL) is a promising approach for tackling long-horizon decision-making tasks. While it is a challenging task due to the lack of detailed supervisory labels for sub-goal learning, and reliance on hundreds to thousands of expert demonstrations. In this work, we introduce SEAL, a novel framework that leverages Large Language Models (LLMs)'s powerful semantic and world knowledge for both specifying sub-goal space and pre-labeling states to semantically meaningful sub-goal representations without prior knowledge of task hierarchies. SEAL employs a dual-encoder structure, combining supervised LLM-guided sub-goal learning with unsupervised Vector Quantization (VQ) for more robust sub-goal representations. Additionally, SEAL incorporates a transition-augmented low-level planner for improved adaptation to sub-goal transitions. Our experiments demonstrate that SEAL outperforms state-of-the-art HIL methods and LLM-based planning approaches, particularly in settings with small expert datasets and complex long-horizon tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02231v1-abstract-full').style.display = 'none'; document.getElementById('2410.02231v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures, in submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02107">arXiv:2410.02107</a> <span> [<a href="https://arxiv.org/pdf/2410.02107">pdf</a>, <a href="https://arxiv.org/format/2410.02107">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Safety Verification of Stochastic Systems: A Set-Erosion Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zishun Liu</a>, <a href="/search/eess?searchtype=author&query=Jafarpour%2C+S">Saber Jafarpour</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yongxin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02107v1-abstract-short" style="display: inline;"> We study the safety verification problem for discrete-time stochastic systems. We propose an approach for safety verification termed set-erosion strategy that verifies the safety of a stochastic system on a safe set through the safety of its associated deterministic system on an eroded subset. The amount of erosion is captured by the probabilistic bound on the distance between stochastic trajector… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02107v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02107v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02107v1-abstract-full" style="display: none;"> We study the safety verification problem for discrete-time stochastic systems. We propose an approach for safety verification termed set-erosion strategy that verifies the safety of a stochastic system on a safe set through the safety of its associated deterministic system on an eroded subset. The amount of erosion is captured by the probabilistic bound on the distance between stochastic trajectories and their associated deterministic counterpart. Building on our recent work [1], we establish a sharp probabilistic bound on this distance. Combining this bound with the set-erosion strategy, we establish a general framework for the safety verification of stochastic systems. Our method is flexible and can work effectively with any deterministic safety verification techniques. We exemplify our method by incorporating barrier functions designed for deterministic safety verification, obtaining barrier certificates much tighter than existing results. Numerical experiments are conducted to demonstrate the efficacy and superiority of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02107v1-abstract-full').style.display = 'none'; document.getElementById('2410.02107v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01591">arXiv:2410.01591</a> <span> [<a href="https://arxiv.org/pdf/2410.01591">pdf</a>, <a href="https://arxiv.org/format/2410.01591">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Imaging foundation model for universal enhancement of non-ideal measurement CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuxin Liu</a>, <a href="/search/eess?searchtype=author&query=Ge%2C+R">Rongjun Ge</a>, <a href="/search/eess?searchtype=author&query=He%2C+Y">Yuting He</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhan Wu</a>, <a href="/search/eess?searchtype=author&query=You%2C+C">Chenyu You</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shuo Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01591v1-abstract-short" style="display: inline;"> Non-ideal measurement computed tomography (NICT), which sacrifices optimal imaging standards for new advantages in CT imaging, is expanding the clinical application scope of CT images. However, with the reduction of imaging standards, the image quality has also been reduced, extremely limiting the clinical acceptability. Although numerous studies have demonstrated the feasibility of deep learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01591v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01591v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01591v1-abstract-full" style="display: none;"> Non-ideal measurement computed tomography (NICT), which sacrifices optimal imaging standards for new advantages in CT imaging, is expanding the clinical application scope of CT images. However, with the reduction of imaging standards, the image quality has also been reduced, extremely limiting the clinical acceptability. Although numerous studies have demonstrated the feasibility of deep learning for the NICT enhancement in specific scenarios, their high data cost and limited generalizability have become large obstacles. The recent research on the foundation model has brought new opportunities for building a universal NICT enhancement model - bridging the image quality degradation with minimal data cost. However, owing to the challenges in the collection of large pre-training datasets and the compatibility of data variation, no success has been reported. In this paper, we propose a multi-scale integrated Transformer AMPlifier (TAMP), the first imaging foundation model for universal NICT enhancement. It has been pre-trained on a large-scale physical-driven simulation dataset with 3.6 million NICT-ICT image pairs, and is able to directly generalize to the NICT enhancement tasks with various non-ideal settings and body regions. Via the adaptation with few data, it can further achieve professional performance in real-world specific scenarios. Our extensive experiments have demonstrated that the proposed TAMP has significant potential for promoting the exploration and application of NICT and serving a wider range of medical scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01591v1-abstract-full').style.display = 'none'; document.getElementById('2410.01591v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01098">arXiv:2410.01098</a> <span> [<a href="https://arxiv.org/pdf/2410.01098">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Generative AI Application for Building Industry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wan%2C+H">Hanlong Wan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yan Chen</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+W">Weili Xu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+F">Fan Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01098v1-abstract-short" style="display: inline;"> This paper investigates the transformative potential of generative AI technologies, particularly large language models (LLMs), within the building industry. By leveraging these advanced AI tools, the study explores their application across key areas such as energy code compliance, building design optimization, and workforce training. The research highlights how LLMs can automate labor-intensive pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01098v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01098v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01098v1-abstract-full" style="display: none;"> This paper investigates the transformative potential of generative AI technologies, particularly large language models (LLMs), within the building industry. By leveraging these advanced AI tools, the study explores their application across key areas such as energy code compliance, building design optimization, and workforce training. The research highlights how LLMs can automate labor-intensive processes, significantly improving efficiency, accuracy, and safety in building practices. The paper also addresses the challenges associated with interpreting complex visual and textual data in architectural plans and regulatory codes, proposing innovative solutions to enhance AI-driven compliance checking and design processes. Additionally, the study considers the broader implications of AI integration, including the development of AI-powered tools for comprehensive code compliance across various regulatory domains and the potential for AI to revolutionize workforce training through realistic simulations. This paper provides a comprehensive analysis of the current capabilities of generative AI in the building industry while outlining future directions for research and development, aiming to pave the way for smarter, more sustainable, and responsive construction practices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01098v1-abstract-full').style.display = 'none'; document.getElementById('2410.01098v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 11 figures, 4 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> PNNL-SA-203362 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19420">arXiv:2409.19420</a> <span> [<a href="https://arxiv.org/pdf/2409.19420">pdf</a>, <a href="https://arxiv.org/format/2409.19420">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-sensor Learning Enables Information Transfer across Different Sensory Data and Augments Multi-modality Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lingting Zhu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yizheng Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lianli Liu</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+L">Lei Xing</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Lequan Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19420v1-abstract-short" style="display: inline;"> Multi-modality imaging is widely used in clinical practice and biomedical research to gain a comprehensive understanding of an imaging subject. Currently, multi-modality imaging is accomplished by post hoc fusion of independently reconstructed images under the guidance of mutual information or spatially registered hardware, which limits the accuracy and utility of multi-modality imaging. Here, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19420v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19420v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19420v1-abstract-full" style="display: none;"> Multi-modality imaging is widely used in clinical practice and biomedical research to gain a comprehensive understanding of an imaging subject. Currently, multi-modality imaging is accomplished by post hoc fusion of independently reconstructed images under the guidance of mutual information or spatially registered hardware, which limits the accuracy and utility of multi-modality imaging. Here, we investigate a data-driven multi-modality imaging (DMI) strategy for synergetic imaging of CT and MRI. We reveal two distinct types of features in multi-modality imaging, namely intra- and inter-modality features, and present a multi-sensor learning (MSL) framework to utilize the crossover inter-modality features for augmented multi-modality imaging. The MSL imaging approach breaks down the boundaries of traditional imaging modalities and allows for optimal hybridization of CT and MRI, which maximizes the use of sensory data. We showcase the effectiveness of our DMI strategy through synergetic CT-MRI brain imaging. The principle of DMI is quite general and holds enormous potential for various DMI applications across disciplines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19420v1-abstract-full').style.display = 'none'; document.getElementById('2409.19420v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 14 figures. Accepted by IEEE Transactions on Pattern Analysis and Machine Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18680">arXiv:2409.18680</a> <span> [<a href="https://arxiv.org/pdf/2409.18680">pdf</a>, <a href="https://arxiv.org/format/2409.18680">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Beyond Single-Audio: Advancing Multi-Audio Processing in Audio Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yiming Chen</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+X">Xianghu Yue</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiaoxue Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/eess?searchtype=author&query=D%27Haro%2C+L+F">Luis Fernando D'Haro</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+R+T">Robby T. Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18680v3-abstract-short" style="display: inline;"> Various audio-LLMs (ALLMs) have been explored recently for tackling different audio tasks simultaneously using a single, unified model. While existing evaluations of ALLMs primarily focus on single-audio tasks, real-world applications often involve processing multiple audio streams simultaneously. To bridge this gap, we propose the first multi-audio evaluation (MAE) benchmark that consists of 20 d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18680v3-abstract-full').style.display = 'inline'; document.getElementById('2409.18680v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18680v3-abstract-full" style="display: none;"> Various audio-LLMs (ALLMs) have been explored recently for tackling different audio tasks simultaneously using a single, unified model. While existing evaluations of ALLMs primarily focus on single-audio tasks, real-world applications often involve processing multiple audio streams simultaneously. To bridge this gap, we propose the first multi-audio evaluation (MAE) benchmark that consists of 20 datasets from 11 multi-audio tasks encompassing both speech and sound scenarios. Comprehensive experiments on MAE demonstrate that the existing ALLMs, while being powerful in comprehending primary audio elements in individual audio inputs, struggling to handle multi-audio scenarios. To this end, we propose a novel multi-audio-LLM (MALLM) to capture audio context among multiple similar audios using discriminative learning on our proposed synthetic data. The results demonstrate that the proposed MALLM outperforms all baselines and achieves high data efficiency using synthetic data without requiring human annotations. The proposed MALLM opens the door for ALLMs towards multi-audio processing era and brings us closer to replicating human auditory capabilities in machines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18680v3-abstract-full').style.display = 'none'; document.getElementById('2409.18680v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP24 Findings. Data available at https://github.com/MatthewCYM/MALLM</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>