CINXE.COM

<!DOCTYPE html> <html lang="en"> <head> <title>Electrical Engineering and Systems Science </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/eess/recent">eess</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Electrical Engineering and Systems Science</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item27">Cross-lists</a></li> <li><a href="#item45">Replacements</a></li> </ul> <p>See <a id="recent-eess" aria-labelledby="recent-eess" href="/list/eess/recent">recent</a> articles</p> <h3>Showing new listings for Friday, 21 March 2025</h3> <div class='paging'>Total of 75 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/eess/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 26 of 26 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.15555" title="Abstract" id="2503.15555"> arXiv:2503.15555 </a> [<a href="/pdf/2503.15555" title="Download PDF" id="pdf-2503.15555" aria-labelledby="pdf-2503.15555">pdf</a>, <a href="https://arxiv.org/html/2503.15555v1" title="View HTML" id="html-2503.15555" aria-labelledby="html-2503.15555" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15555" title="Other formats" id="oth-2503.15555" aria-labelledby="oth-2503.15555">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Whole-Body Image-to-Image Translation for a Virtual Scanner in a Healthcare Digital Twin </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Guarrasi,+V">Valerio Guarrasi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Di+Feola,+F">Francesco Di Feola</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Restivo,+R">Rebecca Restivo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tronchin,+L">Lorenzo Tronchin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Soda,+P">Paolo Soda</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Generating positron emission tomography (PET) images from computed tomography (CT) scans via deep learning offers a promising pathway to reduce radiation exposure and costs associated with PET imaging, improving patient care and accessibility to functional imaging. Whole-body image translation presents challenges due to anatomical heterogeneity, often limiting generalized models. We propose a framework that segments whole-body CT images into four regions-head, trunk, arms, and legs-and uses district-specific Generative Adversarial Networks (GANs) for tailored CT-to-PET translation. Synthetic PET images from each region are stitched together to reconstruct the whole-body scan. Comparisons with a baseline non-segmented GAN and experiments with Pix2Pix and CycleGAN architectures tested paired and unpaired scenarios. Quantitative evaluations at district, whole-body, and lesion levels demonstrated significant improvements with our district-specific GANs. Pix2Pix yielded superior metrics, ensuring precise, high-quality image synthesis. By addressing anatomical heterogeneity, this approach achieves state-of-the-art results in whole-body CT-to-PET translation. This methodology supports healthcare Digital Twins by enabling accurate virtual PET scans from CT data, creating virtual imaging representations to monitor, predict, and optimize health outcomes. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.15627" title="Abstract" id="2503.15627"> arXiv:2503.15627 </a> [<a href="/pdf/2503.15627" title="Download PDF" id="pdf-2503.15627" aria-labelledby="pdf-2503.15627">pdf</a>, <a href="https://arxiv.org/html/2503.15627v1" title="View HTML" id="html-2503.15627" aria-labelledby="html-2503.15627" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15627" title="Other formats" id="oth-2503.15627" aria-labelledby="oth-2503.15627">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Speech Production Model for Radar: Connecting Speech Acoustics with Radar-Measured Vibrations </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Lenz,+I">Isabella Lenz</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rong,+Y">Yu Rong</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bliss,+D">Daniel Bliss</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liss,+J">Julie Liss</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Berisha,+V">Visar Berisha</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 6 figure, InterSpeech Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> Millimeter Wave (mmWave) radar has emerged as a promising modality for speech sensing, offering advantages over traditional microphones. Prior works have demonstrated that radar captures motion signals related to vocal vibrations, but there is a gap in the understanding of the analytical connection between radar-measured vibrations and acoustic speech signals. We establish a mathematical framework linking radar-captured neck vibrations to speech acoustics. We derive an analytical relationship between neck surface displacements and speech. We use data from 66 human participants, and statistical spectral distance analysis to empirically assess the model. Our results show that the radar-measured signal aligns more closely with our model filtered vibration signal derived from speech than with raw speech itself. These findings provide a foundation for improved radar-based speech processing for applications in speech enhancement, coding, surveillance, and authentication. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.15674" title="Abstract" id="2503.15674"> arXiv:2503.15674 </a> [<a href="/pdf/2503.15674" title="Download PDF" id="pdf-2503.15674" aria-labelledby="pdf-2503.15674">pdf</a>, <a href="https://arxiv.org/html/2503.15674v1" title="View HTML" id="html-2503.15674" aria-labelledby="html-2503.15674" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15674" title="Other formats" id="oth-2503.15674" aria-labelledby="oth-2503.15674">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Pervasive Sensing for Livestock Health and Activity Monitoring: Current Methods and Techniques </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Shulkin,+J+D">Jeffrey D Shulkin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Vibhatasilpin,+A">Abhipol Vibhatasilpin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Adhana,+V">Vedant Adhana</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, 0 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Pervasive sensing is transforming health and activity monitoring by enabling continuous and automated data collection through advanced sensing modalities. While extensive research has been conducted on human subjects, its application in livestock remains underexplored. In large-scale agriculture, real-time monitoring of biological signals and behavioral patterns can facilitate early disease detection, optimize feeding and breeding strategies, and ensure compliance with welfare standards. This survey examines key sensing technologies -- including structural vibration, radio frequency (RF), computer vision, and wearables -- highlighting their benefits and challenges in livestock monitoring. By comparing these approaches, we provide insights into their effectiveness, limitations, and potential for integration into modern smart farming systems. Finally, we discuss research gaps and future directions to advance pervasive sensing in livestock health and activity monitoring. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.15722" title="Abstract" id="2503.15722"> arXiv:2503.15722 </a> [<a href="/pdf/2503.15722" title="Download PDF" id="pdf-2503.15722" aria-labelledby="pdf-2503.15722">pdf</a>, <a href="https://arxiv.org/html/2503.15722v1" title="View HTML" id="html-2503.15722" aria-labelledby="html-2503.15722" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15722" title="Other formats" id="oth-2503.15722" aria-labelledby="oth-2503.15722">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Leveraging MoE-based Large Language Model for Zero-Shot Multi-Task Semantic Communication </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Huang,+S">Sin-Yu Huang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liao,+R">Renjie Liao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wong,+V+W">Vincent W.S. Wong</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by ICC 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> Multi-task semantic communication (SC) can reduce the computational resources in wireless systems since retraining is not required when switching between tasks. However, existing approaches typically rely on task-specific embeddings to identify the intended task, necessitating retraining the entire model when given a new task. Consequently, this drives the need for a multi-task SC system that can handle new tasks without additional training, known as zero-shot learning. Inspired by the superior zero-shot capabilities of large language models (LLMs), we leverage pre-trained instruction-tuned LLMs, referred to as fine-tuned language net (FLAN), to improve the generalization capability. We incorporate a mixture-of-experts (MoE) architecture in the FLAN model and propose MoE-FLAN-SC architecture for multi-task SC systems. Our proposed MoE-FLAN-SC architecture can further improve the performance of FLAN-T5 model without increasing the computational cost. Moreover, we design a multi-task feature extraction module (FEM) which can adaptively extract relevant features across various tasks given the provided features and signal-to-noise ratio (SNR). Simulation results show that our proposed MoE-FLAN-SC architecture outperforms three state-of-the-art models in terms of the average accuracy on four different unseen tasks. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.15734" title="Abstract" id="2503.15734"> arXiv:2503.15734 </a> [<a href="/pdf/2503.15734" title="Download PDF" id="pdf-2503.15734" aria-labelledby="pdf-2503.15734">pdf</a>, <a href="https://arxiv.org/html/2503.15734v1" title="View HTML" id="html-2503.15734" aria-labelledby="html-2503.15734" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15734" title="Other formats" id="oth-2503.15734" aria-labelledby="oth-2503.15734">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Disturbance Observers for Robust Backup Control Barrier Functions </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=van+Wijk,+D+E">David E.J. van Wijk</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Das,+E">Ersin Das</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alan,+A">Anil Alan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Coogan,+S">Samuel Coogan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Molnar,+T+G">Tamas G. Molnar</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Burdick,+J+W">Joel W. Burdick</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Majji,+M">Manoranjan Majji</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hobbs,+K+L">Kerianne L. Hobbs</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to IEEE Control Systems Letters (L-CSS). 6 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Designing safe controllers is crucial and notoriously challenging for input-constrained safety-critical control systems. Backup control barrier functions offer an approach for the construction of safe controllers online by considering the flow of the system under a backup controller. However, in the presence of model uncertainties, the flow cannot be accurately computed, making this method insufficient for safety assurance. To tackle this shortcoming, we integrate backup control barrier functions with a disturbance observer and estimate the flow under a reconstruction of the disturbance while refining this estimate over time. We prove that the controllers resulting from the proposed Disturbance Observer Backup Control Barrier Function (DO-bCBF) approach guarantee safety, are robust to unknown disturbances, and satisfy input constraints. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2503.15787" title="Abstract" id="2503.15787"> arXiv:2503.15787 </a> [<a href="/pdf/2503.15787" title="Download PDF" id="pdf-2503.15787" aria-labelledby="pdf-2503.15787">pdf</a>, <a href="https://arxiv.org/html/2503.15787v1" title="View HTML" id="html-2503.15787" aria-labelledby="html-2503.15787" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15787" title="Other formats" id="oth-2503.15787" aria-labelledby="oth-2503.15787">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhancing Physical Layer Security in Cognitive Radio-Enabled NTNs with Beyond Diagonal RIS </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Khan,+W+U">Wali Ullah Khan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sheemar,+C+K">Chandan Kumar Sheemar</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lagunas,+E">Eva Lagunas</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chatzinotas,+S">Symeon Chatzinotas</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6, 4 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Emerging Technologies (cs.ET); Networking and Internet Architecture (cs.NI) </div> <p class='mathjax'> Beyond diagonal reconfigurable intelligent surfaces (BD-RIS) have emerged as a transformative technology for enhancing wireless communication by intelligently manipulating the propagation environment. This paper explores the potential of BD-RIS in improving cognitive radio enabled multilayer non-terrestrial networks (NTNs). It is assumed that a high-altitude platform station (HAPS) has set up the primary network, while an uncrewed aerial vehicle (UAV) establishes the secondary network in the HAPS footprint. We formulate a joint optimization problem to maximize the secrecy rate by optimizing BD-RIS phase shifts and the secondary transmitter power allocation while controlling the interference temperature from the secondary network to the primary network. To solve this problem efficiently, we decouple the original problem into two sub-problems, which are solved iteratively by relying on alternating optimization. Simulation results demonstrate the effectiveness of BD-RIS in cognitive radio-enabled multilayer NTNs to accommodate the secondary network while satisfying the constraints imposed from the primary network. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2503.15823" title="Abstract" id="2503.15823"> arXiv:2503.15823 </a> [<a href="/pdf/2503.15823" title="Download PDF" id="pdf-2503.15823" aria-labelledby="pdf-2503.15823">pdf</a>, <a href="https://arxiv.org/html/2503.15823v1" title="View HTML" id="html-2503.15823" aria-labelledby="html-2503.15823" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15823" title="Other formats" id="oth-2503.15823" aria-labelledby="oth-2503.15823">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Unified Stability Analysis of Safety-Critical Control using Multiple Control Barrier Functions </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Reis,+M+F">Matheus F. Reis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Aguiar,+A+P">A. Pedro Aguiar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to LCSS-CDC2025. Under review </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Ensuring liveness and safety of autonomous and cyber-physical systems remains a fundamental challenge, particularly when multiple safety constraints are present. This letter advances the theoretical foundations of safety-filter Quadratic Programs (QP) and Control Lyapunov Function (CLF)-Control Barrier Function (CBF) controllers by establishing a unified analytical framework for studying their stability properties. We derive sufficient feasibility conditions for QPs with multiple CBFs and formally characterize the conditions leading to undesirable equilibrium points at possible intersecting safe set boundaries. Additionally, we introduce a stability criterion for equilibrium points, providing a systematic approach to identifying conditions under which they can be destabilized or eliminated. Our analysis extends prior theoretical results, deepening the understanding of the conditions of feasibility and stability of CBF-based safety filters and the CLF-CBF QP framework. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2503.15861" title="Abstract" id="2503.15861"> arXiv:2503.15861 </a> [<a href="/pdf/2503.15861" title="Download PDF" id="pdf-2503.15861" aria-labelledby="pdf-2503.15861">pdf</a>, <a href="https://arxiv.org/html/2503.15861v1" title="View HTML" id="html-2503.15861" aria-labelledby="html-2503.15861" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15861" title="Other formats" id="oth-2503.15861" aria-labelledby="oth-2503.15861">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sequential Spatial-Temporal Network for Interpretable Automatic Ultrasonic Assessment of Fetal Head during labor </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Gan,+J">Jie Gan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liang,+Z">Zhuonan Liang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fan,+J">Jianan Fan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mcguire,+L">Lisa Mcguire</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Watson,+C">Caterina Watson</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Spurway,+J">Jacqueline Spurway</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Clarke,+J">Jillian Clarke</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cai,+W">Weidong Cai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This work has been accepted to 2025 IEEE 22nd International Symposium on Biomedical Imaging (ISBI) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> The intrapartum ultrasound guideline established by ISUOG highlights the Angle of Progression (AoP) and Head Symphysis Distance (HSD) as pivotal metrics for assessing fetal head descent and predicting delivery outcomes. Accurate measurement of the AoP and HSD requires a structured process. This begins with identifying standardized ultrasound planes, followed by the detection of specific anatomical landmarks within the regions of the pubic symphysis and fetal head that correlate with the delivery parameters AoP and HSD. Finally, these measurements are derived based on the identified anatomical landmarks. Addressing the clinical demands and standard operation process outlined in the ISUOG guideline, we introduce the Sequential Spatial-Temporal Network (SSTN), the first interpretable model specifically designed for the video of intrapartum ultrasound analysis. The SSTN operates by first identifying ultrasound planes, then segmenting anatomical structures such as the pubic symphysis and fetal head, and finally detecting key landmarks for precise measurement of HSD and AoP. Furthermore, the cohesive framework leverages task-related information to improve accuracy and reliability. Experimental evaluations on clinical datasets demonstrate that SSTN significantly surpasses existing models, reducing the mean absolute error by 18% for AoP and 22% for HSD. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2503.15966" title="Abstract" id="2503.15966"> arXiv:2503.15966 </a> [<a href="/pdf/2503.15966" title="Download PDF" id="pdf-2503.15966" aria-labelledby="pdf-2503.15966">pdf</a>, <a href="https://arxiv.org/html/2503.15966v1" title="View HTML" id="html-2503.15966" aria-labelledby="html-2503.15966" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15966" title="Other formats" id="oth-2503.15966" aria-labelledby="oth-2503.15966">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Privacy-Preserving Utilization of Distribution System Flexibility for Enhanced TSO-DSO Interoperability: A Novel Machine Learning-Based Optimal Power Flow Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Dindar,+B">Burak Dindar</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Saner,+C+B">Can Berk Saner</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=%C3%87akmak,+H+K">H眉seyin K. 脟akmak</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hagenmeyer,+V">Veit Hagenmeyer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Due to the transformation of the power system, the effective use of flexibility from the distribution system (DS) is becoming crucial for efficient network management. Leveraging this flexibility requires interoperability among stakeholders, including Transmission System Operators (TSOs) and Distribution System Operators (DSOs). However, data privacy concerns among stakeholders present significant challenges for utilizing this flexibility effectively. To address these challenges, we propose a machine learning (ML)-based method in which the technical constraints of the DSs are represented by ML models trained exclusively on non-sensitive data. Using these models, the TSO can solve the optimal power flow (OPF) problem and directly determine the dispatch of flexibility-providing units (FPUs), in our case, distributed generators (DGs), in a single round of communication. To achieve this, we introduce a novel neural network (NN) architecture specifically designed to efficiently represent the feasible region of the DSs, ensuring computational effectiveness. Furthermore, we incorporate various PQ charts rather than idealized ones, demonstrating that the proposed method is adaptable to a wide range of FPU characteristics. To assess the effectiveness of the proposed method, we benchmark it against the standard AC-OPF on multiple DSs with meshed connections and multiple points of common coupling (PCCs) with varying voltage magnitudes. The numerical results indicate that the proposed method achieves performant results while prioritizing data privacy. Additionally, since this method directly determines the dispatch of FPUs, it eliminates the need for an additional disaggregation step. By representing the DSs technical constraints through ML models trained exclusively on non-sensitive data, the transfer of sensitive information between stakeholders is prevented. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2503.16010" title="Abstract" id="2503.16010"> arXiv:2503.16010 </a> [<a href="/pdf/2503.16010" title="Download PDF" id="pdf-2503.16010" aria-labelledby="pdf-2503.16010">pdf</a>, <a href="https://arxiv.org/html/2503.16010v1" title="View HTML" id="html-2503.16010" aria-labelledby="html-2503.16010" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16010" title="Other formats" id="oth-2503.16010" aria-labelledby="oth-2503.16010">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Patch-based learning of adaptive Total Variation parameter maps for blind image denoising </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Fantasia,+C">Claudio Fantasia</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Calatroni,+L">Luca Calatroni</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Descombes,+X">Xavier Descombes</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rekik,+R">Rim Rekik</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Machine Learning (cs.LG); Numerical Analysis (math.NA) </div> <p class='mathjax'> We consider a patch-based learning approach defined in terms of neural networks to estimate spatially adaptive regularisation parameter maps for image denoising with weighted Total Variation and test it to situations when the noise distribution is unknown. As an example, we consider situations where noise could be either Gaussian or Poisson and perform preliminary model selection by a standard binary classification network. Then, we define a patch-based approach where at each image pixel an optimal weighting between TV regularisation and the corresponding data fidelity is learned in a supervised way using reference natural image patches upon optimisation of SSIM and in a sliding window fashion. Extensive numerical results are reported for both noise models, showing significant improvement w.r.t. results obtained by means of optimal scalar regularisation. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2503.16055" title="Abstract" id="2503.16055"> arXiv:2503.16055 </a> [<a href="/pdf/2503.16055" title="Download PDF" id="pdf-2503.16055" aria-labelledby="pdf-2503.16055">pdf</a>, <a href="https://arxiv.org/html/2503.16055v1" title="View HTML" id="html-2503.16055" aria-labelledby="html-2503.16055" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16055" title="Other formats" id="oth-2503.16055" aria-labelledby="oth-2503.16055">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SALT: Singular Value Adaptation with Low-Rank Transformation </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Elsayed,+A">Abdelrahman Elsayed</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hashmi,+S">Sarim Hashmi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Elseiagy,+M">Mohammed Elseiagy</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+H">Hu Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yaqub,+M">Mohammad Yaqub</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Almakky,+I">Ibrahim Almakky</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> The complex nature of medical image segmentation calls for models that are specifically designed to capture detailed, domain-specific features. Large foundation models offer considerable flexibility, yet the cost of fine-tuning these models remains a significant barrier. Parameter-Efficient Fine-Tuning (PEFT) methods, such as Low-Rank Adaptation (LoRA), efficiently update model weights with low-rank matrices but may suffer from underfitting when the chosen rank is insufficient to capture domain-specific nuances. Conversely, full-rank Singular Value Decomposition (SVD) based methods provide comprehensive updates by modifying all singular values, yet they often lack flexibility and exhibit variable performance across datasets. We propose SALT (Singular Value Adaptation with Low-Rank Transformation), a method that selectively adapts the most influential singular values using trainable scale and shift parameters while complementing this with a low-rank update for the remaining subspace. This hybrid approach harnesses the advantages of both LoRA and SVD, enabling effective adaptation without relying on increasing model size or depth. Evaluated on 5 challenging medical datasets, ranging from as few as 20 samples to 1000, SALT outperforms state-of-the-art PEFT (LoRA and SVD) by 2% to 5% in Dice with only 3.9% trainable parameters, demonstrating robust adaptation even in low-resource settings. The code for SALT is available at: <a href="https://github.com/BioMedIA-MBZUAI/SALT" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2503.16075" title="Abstract" id="2503.16075"> arXiv:2503.16075 </a> [<a href="/pdf/2503.16075" title="Download PDF" id="pdf-2503.16075" aria-labelledby="pdf-2503.16075">pdf</a>, <a href="https://arxiv.org/html/2503.16075v1" title="View HTML" id="html-2503.16075" aria-labelledby="html-2503.16075" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16075" title="Other formats" id="oth-2503.16075" aria-labelledby="oth-2503.16075">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> 3-D Image-to-Image Fusion in Lightsheet Microscopy by Two-Step Adversarial Network: Contribution to the FuseMyCells Challenge </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wodzinski,+M">Marek Wodzinski</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=M%C3%BCller,+H">Henning M眉ller</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Lightsheet microscopy is a powerful 3-D imaging technique that addresses limitations of traditional optical and confocal microscopy but suffers from a low penetration depth and reduced image quality at greater depths. Multiview lightsheet microscopy improves 3-D resolution by combining multiple views but simultaneously increasing the complexity and the photon budget, leading to potential photobleaching and phototoxicity. The FuseMyCells challenge, organized in conjunction with the IEEE ISBI 2025 conference, aims to benchmark deep learning-based solutions for fusing high-quality 3-D volumes from single 3-D views, potentially simplifying procedures and conserving the photon budget. In this work, we propose a contribution to the FuseMyCells challenge based on a two-step procedure. The first step processes a downsampled version of the image to capture the entire region of interest, while the second step uses a patch-based approach for high-resolution inference, incorporating adversarial loss to enhance visual outcomes. This method addresses challenges related to high data resolution, the necessity of global context, and the preservation of high-frequency details. Experimental results demonstrate the effectiveness of our approach, highlighting its potential to improve 3-D image fusion quality and extend the capabilities of lightsheet microscopy. The average SSIM for the nucleus and membranes is greater than 0.85 and 0.91, respectively. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2503.16084" title="Abstract" id="2503.16084"> arXiv:2503.16084 </a> [<a href="/pdf/2503.16084" title="Download PDF" id="pdf-2503.16084" aria-labelledby="pdf-2503.16084">pdf</a>, <a href="https://arxiv.org/html/2503.16084v1" title="View HTML" id="html-2503.16084" aria-labelledby="html-2503.16084" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16084" title="Other formats" id="oth-2503.16084" aria-labelledby="oth-2503.16084">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Age of Information in Multi-Relay Networks with Maximum Age Scheduling </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=de+Jesus,+G+M">Gabriel Martins de Jesus</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Pereira,+F+M">Felippe Moraes Pereira</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rebelatto,+J+L">Jo茫o Luiz Rebelatto</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Souza,+R+D">Richard Demo Souza</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=L%C3%B3pez,+O+A">Onel Alcaraz L贸pez</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 11 figures. This paper is under review for possible publication </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> We propose and evaluate age of information (AoI)-aware multiple access mechanisms for the Internet of Things (IoT) in multi-relay two-hop networks. The network considered comprises end devices (EDs) communicating with a set of relays in ALOHA fashion, with new information packets to be potentially transmitted every time slot. The relays, in turn, forward the collected packets to an access point (AP), the final destination of the information generated by the EDs. More specifically, in this work we investigate the performance of four age-aware algorithms that prioritize older packets to be transmitted, namely max-age matching (MAM), iterative max-age scheduling (IMAS), age-based delayed request (ABDR), and buffered ABDR (B-ABDR). The former two algorithms are adapted into the multi-relay setup from previous research, and achieve satisfactory average AoI and average peak AoI performance, at the expense of a significant amount of information exchange between the relays and the AP. The latter two algorithms are newly proposed to let relays decide which one(s) will transmit in a given time slot, requiring less signaling than the former algorithms. We provide an analytical formulation for the AoI lower bound performance, compare the performance of all algorithms in this set-up, and show that they approach the lower bound. The latter holds especially true for B-ABDR, which approaches the lower bound the most closely, tilting the scale in its favor, as it also requires far less signaling than MAM and IMAS. </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2503.16139" title="Abstract" id="2503.16139"> arXiv:2503.16139 </a> [<a href="/pdf/2503.16139" title="Download PDF" id="pdf-2503.16139" aria-labelledby="pdf-2503.16139">pdf</a>, <a href="https://arxiv.org/html/2503.16139v1" title="View HTML" id="html-2503.16139" aria-labelledby="html-2503.16139" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16139" title="Other formats" id="oth-2503.16139" aria-labelledby="oth-2503.16139">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Aging-aware Energy Management for Residential Multi-Carrier Energy Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Slaifstein,+D">Dar铆o Slaifstein</a> (1), <a href="https://arxiv.org/search/eess?searchtype=author&query=Mouli,+G+R+C">Gautham Ram Chandra Mouli</a> (1), <a href="https://arxiv.org/search/eess?searchtype=author&query=Ramirez-Elizondo,+L">Laura Ramirez-Elizondo</a> (1), <a href="https://arxiv.org/search/eess?searchtype=author&query=Bauer,+P">Pavol Bauer</a> (1) ((1) Delft University of Technology)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Optimization and Control (math.OC) </div> <p class='mathjax'> In the context of building electrification, the operation of distributed energy resources integrating multiple energy carriers (electricity, heat, mobility) poses a significant challenge. Such an operation calls for an energy management system that decides the set points of the primary control layer in the best way possible. This has to fulfill user requirements, minimize costs, and balance local generation with energy storage. Such storage enables building flexibility. This paper presents a novel aging-aware strategy for electrified buildings. The energy management algorithm presented incorporates physics-based battery aging models to enhance the operational performance, making explicit the trade-off between the grid cost and battery degradation. The proposed algorithm can be used to improve grid costs or to protect the batteries (static or electric vehicles). This energy management algorithm can control different cathode chemistries as well as aged and fresh batteries, improving costs with respect to benchmarks for these cases. </p> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2503.16149" title="Abstract" id="2503.16149"> arXiv:2503.16149 </a> [<a href="/pdf/2503.16149" title="Download PDF" id="pdf-2503.16149" aria-labelledby="pdf-2503.16149">pdf</a>, <a href="https://arxiv.org/html/2503.16149v1" title="View HTML" id="html-2503.16149" aria-labelledby="html-2503.16149" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16149" title="Other formats" id="oth-2503.16149" aria-labelledby="oth-2503.16149">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Selective Complementary Feature Fusion and Modal Feature Compression Interaction for Brain Tumor Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+D">Dong Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+B">Boyue Zhao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+Y">Yi Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+M">Meng Zhao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Efficient modal feature fusion strategy is the key to achieve accurate segmentation of brain glioma. However, due to the specificity of different MRI modes, it is difficult to carry out cross-modal fusion with large differences in modal features, resulting in the model ignoring rich feature information. On the other hand, the problem of multi-modal feature redundancy interaction occurs in parallel networks due to the proliferation of feature dimensions, further increase the difficulty of multi-modal feature fusion at the bottom end. In order to solve the above problems, we propose a noval complementary feature compression interaction network (CFCI-Net), which realizes the complementary fusion and compression interaction of multi-modal feature information with an efficient mode fusion strategy. Firstly, we propose a selective complementary feature fusion (SCFF) module, which adaptively fuses rich cross-modal feature information by complementary soft selection weights. Secondly, a modal feature compression interaction (MFCI) transformer is proposed to deal with the multi-mode fusion redundancy problem when the feature dimension surges. The MFCI transformer is composed of modal feature compression (MFC) and modal feature interaction (MFI) to realize redundancy feature compression and multi-mode feature interactive learning. %In MFI, we propose a hierarchical interactive attention mechanism based on multi-head attention. Evaluations on the BraTS2019 and BraTS2020 datasets demonstrate that CFCI-Net achieves superior results compared to state-of-the-art models. Code: <a href="https://github.com/CDmm0/CFCI-Net" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2503.16169" title="Abstract" id="2503.16169"> arXiv:2503.16169 </a> [<a href="/pdf/2503.16169" title="Download PDF" id="pdf-2503.16169" aria-labelledby="pdf-2503.16169">pdf</a>, <a href="/format/2503.16169" title="Other formats" id="oth-2503.16169" aria-labelledby="oth-2503.16169">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Linear Block Codes with Gradient Quantization </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Dufr%C3%A8ne,+L">Louis-Adrien Dufr猫ne</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lampin,+Q">Quentin Lampin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Larue,+G">Guillaume Larue</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> This study investigates the problem of learning linear block codes optimized for Belief-Propagation decoders significantly improving performance compared to the state-of-the-art. Our previous research is extended with an enhanced system design that facilitates a more effective learning process for the parity check matrix. We simplify the input dataset, restrict the number of parameters to learn and improve the gradient back-propagation within the model. We also introduce novel optimizers specifically designed for discrete-valued weights. Based on conventional gradient computation, these optimizers provide discrete weights updates, enabling finer control and improving explainability of the learning process. Through these changes, we consistently achieve improved code performance, provided appropriately chosen hyper-parameters. To rigorously evaluate the performance of learned codes in the context of short to medium block lengths, we propose a comprehensive code performance assessment framework. This framework enables a fair comparison between our learning methodology and random search approaches, ensuring statistical significance in our results. The proposed model pave the way for a new approach to the efficient learning of linear block codes tailored to specific decoder structures. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2503.16235" title="Abstract" id="2503.16235"> arXiv:2503.16235 </a> [<a href="/pdf/2503.16235" title="Download PDF" id="pdf-2503.16235" aria-labelledby="pdf-2503.16235">pdf</a>, <a href="/format/2503.16235" title="Other formats" id="oth-2503.16235" aria-labelledby="oth-2503.16235">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Unifying Complexity-Certification Framework for Branch-and-Bound Algorithms for Mixed-Integer Linear and Quadratic Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Shoja,+S">Shamisa Shoja</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Arnstr%C3%B6m,+D">Daniel Arnstr枚m</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Axehill,+D">Daniel Axehill</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> In model predictive control (MPC) for hybrid systems, solving optimization problems efficiently and with guarantees on worst-case computational complexity is critical, particularly in real-time applications. These optimization problems often take the form of mixed-integer linear programs (MILPs) or mixed-integer quadratic programs (MIQPs) that depend on system parameters. A common approach for solving such problems is the branch-and-bound (B&B) method. This paper extends existing complexity certification methods by presenting a unified complexity-certification framework for B&B-based MILP and MIQP solvers, specifically for the family of multi-parametric MILP and MIQP problems that arise in, e.g., hybrid MPC applications. The framework provides guarantees on worst-case computational measures, including the maximum number of iterations or relaxations B&B algorithms require to reach optimality. It systematically accounts for different branching and node selection strategies, as well as heuristics integrated into B&B, ensuring a comprehensive certification framework. By offering theoretical guarantees and practical insights for solver customization, the proposed framework enhances the reliability of B&B for real-time application. The usefulness of the proposed framework is demonstrated through numerical experiments on both random MILPs and MIQPs, as well as on MIQPs arising from a hybrid MPC problem. </p> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2503.16236" title="Abstract" id="2503.16236"> arXiv:2503.16236 </a> [<a href="/pdf/2503.16236" title="Download PDF" id="pdf-2503.16236" aria-labelledby="pdf-2503.16236">pdf</a>, <a href="https://arxiv.org/html/2503.16236v1" title="View HTML" id="html-2503.16236" aria-labelledby="html-2503.16236" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16236" title="Other formats" id="oth-2503.16236" aria-labelledby="oth-2503.16236">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Distributed Algorithm for Cooperative Joint Localization and Tracking Using Multiple-Input Multiple-Output Radars </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Kitchen,+A+H+F">Astrid Holm Filtenborg Kitchen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Br%C3%B8ndt,+M+S+L">Mikkel Sebastian Lundsgaard Br酶ndt</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jensen,+M+S">Marie Saugstrup Jensen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Pedersen,+T">Troels Pedersen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Westerkam,+A+M">Anders Malthe Westerkam</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> We propose a distributed joint localization and tracking algorithm using a message passing framework, for multiple-input multiple-output radars. We employ the mean field approach to derive an iterative algorithm. The obtained algorithm features a small communication overhead that scales linearly with the number of radars in the system. The proposed algorithm shows good estimation accuracy in two simulated scenarios even below 0 dB signal to noise ratio. In both cases the ground truth falls within the 95 % confidence interval of the estimated posterior for the majority of the track. </p> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2503.16264" title="Abstract" id="2503.16264"> arXiv:2503.16264 </a> [<a href="/pdf/2503.16264" title="Download PDF" id="pdf-2503.16264" aria-labelledby="pdf-2503.16264">pdf</a>, <a href="https://arxiv.org/html/2503.16264v1" title="View HTML" id="html-2503.16264" aria-labelledby="html-2503.16264" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16264" title="Other formats" id="oth-2503.16264" aria-labelledby="oth-2503.16264">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Do image and video quality metrics model low-level human vision? </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Hammou,+D">Dounia Hammou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cai,+Y">Yancheng Cai</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Madhusudanarao,+P">Pavan Madhusudanarao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bampis,+C+G">Christos G. Bampis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mantiuk,+R+K">Rafa艂 K. Mantiuk</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Multimedia (cs.MM) </div> <p class='mathjax'> Image and video quality metrics, such as SSIM, LPIPS, and VMAF, are aimed to predict the perceived quality of the evaluated content and are often claimed to be "perceptual". Yet, few metrics directly model human visual perception, and most rely on hand-crafted formulas or training datasets to achieve alignment with perceptual data. In this paper, we propose a set of tests for full-reference quality metrics that examine their ability to model several aspects of low-level human vision: contrast sensitivity, contrast masking, and contrast matching. The tests are meant to provide additional scrutiny for newly proposed metrics. We use our tests to analyze 33 existing image and video quality metrics and find their strengths and weaknesses, such as the ability of LPIPS and MS-SSIM to predict contrast masking and poor performance of VMAF in this task. We further find that the popular SSIM metric overemphasizes differences in high spatial frequencies, but its multi-scale counterpart, MS-SSIM, addresses this shortcoming. Such findings cannot be easily made using existing evaluation protocols. </p> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2503.16279" title="Abstract" id="2503.16279"> arXiv:2503.16279 </a> [<a href="/pdf/2503.16279" title="Download PDF" id="pdf-2503.16279" aria-labelledby="pdf-2503.16279">pdf</a>, <a href="https://arxiv.org/html/2503.16279v1" title="View HTML" id="html-2503.16279" aria-labelledby="html-2503.16279" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16279" title="Other formats" id="oth-2503.16279" aria-labelledby="oth-2503.16279">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluation of Torque Ripple and Tooth Forces of a Skewed PMSM by 2D and 3D FE Simulations </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=M%C3%BCller,+K">Karsten M眉ller</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wanke,+A">Andreas Wanke</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Burkhardt,+Y">Yves Burkhardt</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=De+Gersem,+H">Herbert De Gersem</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Scientific Computing in Electrical Engineering SCEE 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> In this paper, various skewing configurations for a permanent magnet synchronous machine are evaluated by comparing torque ripple amplitudes and tooth forces. Since high-frequency pure tones emitted by an electrical machine significantly impact a vehicle's noise, vibration, and harshness (NVH) behavior, it is crucial to analyze radial forces. These forces are examined and compared across different skewing configurations and angles using the Maxwell stress tensor in 2D and 3D finite-element (FE) simulations. In addition to conventional investigations in 2D FE simulations, 3D FE simulations are executed. These 3D FE simulations show that axial forces occur at the transition points between the magnetic segments of a linear step skewed rotor. </p> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2503.16288" title="Abstract" id="2503.16288"> arXiv:2503.16288 </a> [<a href="/pdf/2503.16288" title="Download PDF" id="pdf-2503.16288" aria-labelledby="pdf-2503.16288">pdf</a>, <a href="https://arxiv.org/html/2503.16288v1" title="View HTML" id="html-2503.16288" aria-labelledby="html-2503.16288" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16288" title="Other formats" id="oth-2503.16288" aria-labelledby="oth-2503.16288">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Overview of Variable Rate Coding in JPEG AI </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Jia,+P">Panqi Jia</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Brand,+F">Fabian Brand</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yu,+D">Dequan Yu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Karabutov,+A">Alexander Karabutov</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alshina,+E">Elena Alshina</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kaup,+A">Andre Kaup</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span> </div> <p class='mathjax'> Empirical evidence has demonstrated that learning-based image compression can outperform classical compression frameworks. This has led to the ongoing standardization of learned-based image codecs, namely Joint Photographic Experts Group (JPEG) AI. The objective of JPEG AI is to enhance compression efficiency and provide a software and hardwarefriendly solution. Based on our research, JPEG AI represents the first standardization that can facilitate the implementation of a learned image codec on a mobile device. This article presents an overview of the variable rate coding functionality in JPEG AI, which includes three variable rate adaptations: a threedimensional quality map, a fast bit rate matching algorithm, and a training strategy. The variable rate adaptations offer a continuous rate function up to 2.0 bpp, exhibiting a high level of performance, a flexible bit allocation between different color components, and a region of interest function for the specified use case. The evaluation of performance encompasses both objective and subjective results. With regard to the objective bit rate matching, the main profile with low complexity yielded a 13.1% BD-rate gain over VVC intra, while the high profile with high complexity achieved a 19.2% BD-rate gain over VVC intra. The BD-rate result is calculated as the mean of the seven perceptual metrics defined in the JPEG AI common test conditions. With respect to subjective results, the example of improving the quality of the region of interest is illustrated. </p> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2503.16298" title="Abstract" id="2503.16298"> arXiv:2503.16298 </a> [<a href="/pdf/2503.16298" title="Download PDF" id="pdf-2503.16298" aria-labelledby="pdf-2503.16298">pdf</a>, <a href="https://arxiv.org/html/2503.16298v1" title="View HTML" id="html-2503.16298" aria-labelledby="html-2503.16298" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16298" title="Other formats" id="oth-2503.16298" aria-labelledby="oth-2503.16298">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hyperspectral Unmixing using Iterative, Sparse and Ensambling Approaches for Large Spectral Libraries Applied to Soils and Minerals </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Preston,+J">Jade Preston</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Basener,+W">William Basener</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Data Analysis, Statistics and Probability (physics.data-an) </div> <p class='mathjax'> Unmixing is a fundamental process in hyperspectral image processing in which the materials present in a mixed pixel are determined based on the spectra of candidate materials and the pixel spectrum. Practical and general utility requires a large spectral library with sample measurements covering the full variation in each candidate material as well as a sufficiently varied collection of potential materials. However, any spectral library with more spectra than bands will lead to an ill-posed inversion problem when using classical least-squares regression-based unmixing methods. Moreover, for numerical and dimensionality reasons, libraries with over 10 or 20 spectra behave computationally as though they are ill-posed. In current practice, unmixing is often applied to imagery using manually-selected materials or image endmembers. General unmixing of a spectrum from an unknown material with a large spectral library requires some form of sparse regression; regression where only a small number of coefficients are nonzero. This requires a trade-off between goodness-of-fit and model size. In this study we compare variations of two sparse regression techniques, focusing on the relationship between structure and chemistry of materials and the accuracy of the various models for identifying the correct mixture of materials present. Specifically, we examine LASSO regression and ElasticNet in contrast with variations of iterative feature selection, Bayesian Model Averaging (BMA), and quadratic BMA (BMA-Q) -- incorporating LASSO regression and ElasticNet as their base model. To evaluate the the effectiveness of these methods, we consider the molecular composition similarities and differences of substances selected in the models compared to the ground truth. </p> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2503.16309" title="Abstract" id="2503.16309"> arXiv:2503.16309 </a> [<a href="/pdf/2503.16309" title="Download PDF" id="pdf-2503.16309" aria-labelledby="pdf-2503.16309">pdf</a>, <a href="https://arxiv.org/html/2503.16309v1" title="View HTML" id="html-2503.16309" aria-labelledby="html-2503.16309" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16309" title="Other formats" id="oth-2503.16309" aria-labelledby="oth-2503.16309">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Rapid patient-specific neural networks for intraoperative X-ray to volume registration </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Gopalakrishnan,+V">Vivek Gopalakrishnan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Dey,+N">Neel Dey</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chlorogiannis,+D">David-Dimitris Chlorogiannis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Abumoussa,+A">Andrew Abumoussa</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Larson,+A+M">Anna M. Larson</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Orbach,+D+B">Darren B. Orbach</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Frisken,+S">Sarah Frisken</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Golland,+P">Polina Golland</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Medical Physics (physics.med-ph) </div> <p class='mathjax'> The integration of artificial intelligence in image-guided interventions holds transformative potential, promising to extract 3D geometric and quantitative information from conventional 2D imaging modalities during complex procedures. Achieving this requires the rapid and precise alignment of 2D intraoperative images (e.g., X-ray) with 3D preoperative volumes (e.g., CT, MRI). However, current 2D/3D registration methods fail across the broad spectrum of procedures dependent on X-ray guidance: traditional optimization techniques require custom parameter tuning for each subject, whereas neural networks trained on small datasets do not generalize to new patients or require labor-intensive manual annotations, increasing clinical burden and precluding application to new anatomical targets. To address these challenges, we present xvr, a fully automated framework for training patient-specific neural networks for 2D/3D registration. xvr uses physics-based simulation to generate abundant high-quality training data from a patient's own preoperative volumetric imaging, thereby overcoming the inherently limited ability of supervised models to generalize to new patients and procedures. Furthermore, xvr requires only 5 minutes of training per patient, making it suitable for emergency interventions as well as planned procedures. We perform the largest evaluation of a 2D/3D registration algorithm on real X-ray data to date and find that xvr robustly generalizes across a diverse dataset comprising multiple anatomical structures, imaging modalities, and hospitals. Across surgical tasks, xvr achieves submillimeter-accurate registration at intraoperative speeds, improving upon existing methods by an order of magnitude. xvr is released as open-source software freely available at <a href="https://github.com/eigenvivek/xvr" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2503.16331" title="Abstract" id="2503.16331"> arXiv:2503.16331 </a> [<a href="/pdf/2503.16331" title="Download PDF" id="pdf-2503.16331" aria-labelledby="pdf-2503.16331">pdf</a>, <a href="https://arxiv.org/html/2503.16331v1" title="View HTML" id="html-2503.16331" aria-labelledby="html-2503.16331" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16331" title="Other formats" id="oth-2503.16331" aria-labelledby="oth-2503.16331">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Finite Sample Analysis of System Poles for Ho-Kalman Algorithm </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Sun,+S">Shuai Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 17 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> This paper investigates the error analysis of system pole estimation in $n$-dimensional discrete-time Linear Time-Invariant systems with $m$ outputs and $p$ inputs, using the classical Ho-Kalman algorithm based on finite input-output sample data. Building upon prior work, we establish end-to-end estimation guarantees for system poles under both single-trajectory and multiple-trajectory settings. Specifically, we prove that, with high probability, the estimation error of system poles decreases at a rate of at least $\mathcal{O}\{T^{-\frac{1}{2n}}\}$ in the single-trajectory case and $\mathcal{O}\{N^{-\frac{1}{2n}}\}$ in the multiple-trajectory case, where $T$ is the length of a single trajectory, and $N$ is the number of trajectories. Furthermore, we reveal that in both settings, achieving a constant estimation accuracy for system poles requires the sample size to grow super-polynomially with respect to the larger of the two ratios, $ \max\{n/m, n/p\} $. Numerical experiments are conducted to validate the non-asymptotic results of system pole estimation. </p> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2503.16389" title="Abstract" id="2503.16389"> arXiv:2503.16389 </a> [<a href="/pdf/2503.16389" title="Download PDF" id="pdf-2503.16389" aria-labelledby="pdf-2503.16389">pdf</a>, <a href="https://arxiv.org/html/2503.16389v1" title="View HTML" id="html-2503.16389" aria-labelledby="html-2503.16389" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16389" title="Other formats" id="oth-2503.16389" aria-labelledby="oth-2503.16389">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Attentional Triple-Encoder Network in Spatiospectral Domains for Medical Image Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Qi,+K">Kristin Qi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Di,+X">Xinhan Di</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE Conference on Artificial Intelligence (IEEE CAI) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Retinal Optical Coherence Tomography (OCT) segmentation is essential for diagnosing pathology. Traditional methods focus on either spatial or spectral domains, overlooking their combined dependencies. We propose a triple-encoder network that integrates CNNs for spatial features, Fast Fourier Convolution (FFC) for spectral features, and attention mechanisms to capture global relationships across both domains. Attention fusion modules integrate convolution and cross-attention to further enhance features. Our method achieves an average Dice score improvement from 0.855 to 0.864, outperforming prior work. </p> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2503.16411" title="Abstract" id="2503.16411"> arXiv:2503.16411 </a> [<a href="/pdf/2503.16411" title="Download PDF" id="pdf-2503.16411" aria-labelledby="pdf-2503.16411">pdf</a>, <a href="https://arxiv.org/html/2503.16411v1" title="View HTML" id="html-2503.16411" aria-labelledby="html-2503.16411" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16411" title="Other formats" id="oth-2503.16411" aria-labelledby="oth-2503.16411">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Parallel Domain-Decomposition Algorithms for Complexity Certification of Branch-and-Bound Algorithms for Mixed-Integer Linear and Quadratic Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Shoja,+S">Shamisa Shoja</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Arnstr%C3%B6m,+D">Daniel Arnstr枚m</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Axehill,+D">Daniel Axehill</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> When implementing model predictive control (MPC) for hybrid systems with a linear or a quadratic performance measure, a mixed-integer linear program (MILP) or a mixed-integer quadratic program (MIQP) needs to be solved, respectively, at each sampling instant. Recent work has introduced the possibility to certify the computational complexity of branch-and-bound (B&B) algorithms when solving MILP and MIQP problems formulated as multi-parametric MILPs (mp-MILPs) and mp-MIQPs. Such a framework allows for computing the worst-case computational complexity of standard B&B-based MILP and MIQP solvers, quantified by metrics such as the total number of LP/QP iterations and B&B nodes. These results are highly relevant for real-time hybrid MPC applications. In this paper, we extend this framework by developing parallel, domain-decomposition versions of the previously proposed algorithm, allowing it to scale to larger problem sizes and enable the use of high-performance computing (HPC) resources. Furthermore, to reduce peak memory consumption, we introduce two modifications to the existing (serial) complexity certification framework, integrating them into the proposed parallel algorithms. Numerical experiments show that the parallel algorithms significantly reduce computation time while maintaining the correctness of the original framework. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 18 of 18 entries)</h3> <dt> <a name='item27'>[27]</a> <a href ="/abs/2503.15498" title="Abstract" id="2503.15498"> arXiv:2503.15498 </a> (cross-list from cs.HC) [<a href="/pdf/2503.15498" title="Download PDF" id="pdf-2503.15498" aria-labelledby="pdf-2503.15498">pdf</a>, <a href="https://arxiv.org/html/2503.15498v1" title="View HTML" id="html-2503.15498" aria-labelledby="html-2503.15498" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15498" title="Other formats" id="oth-2503.15498" aria-labelledby="oth-2503.15498">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Revival: Collaborative Artistic Creation through Human-AI Interactions in Musical Creativity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+K+J+M">Keon Ju M. Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pasquier,+P">Philippe Pasquier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuri,+J">Jun Yuri</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Keon Ju M. Lee, Philippe Pasquier and Jun Yuri. 2024. In Proceedings of the Creativity and Generative AI NIPS (Neural Information Processing Systems) Workshop </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Artificial Intelligence (cs.AI); Multiagent Systems (cs.MA); Multimedia (cs.MM); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Revival is an innovative live audiovisual performance and music improvisation by our artist collective K-Phi-A, blending human and AI musicianship to create electronic music with audio-reactive visuals. The performance features real-time co-creative improvisation between a percussionist, an electronic music artist, and AI musical agents. Trained in works by deceased composers and the collective's compositions, these agents dynamically respond to human input and emulate complex musical styles. An AI-driven visual synthesizer, guided by a human VJ, produces visuals that evolve with the musical landscape. Revival showcases the potential of AI and human collaboration in improvisational artistic creation. </p> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2503.15501" title="Abstract" id="2503.15501"> arXiv:2503.15501 </a> (cross-list from cs.HC) [<a href="/pdf/2503.15501" title="Download PDF" id="pdf-2503.15501" aria-labelledby="pdf-2503.15501">pdf</a>, <a href="https://arxiv.org/html/2503.15501v1" title="View HTML" id="html-2503.15501" aria-labelledby="html-2503.15501" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15501" title="Other formats" id="oth-2503.15501" aria-labelledby="oth-2503.15501">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Development of an Inclusive Educational Platform Using Open Technologies and Machine Learning: A Case Study on Accessibility Enhancement </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Togni,+J">Jimi Togni</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> This study addresses the pressing challenge of educational inclusion for students with special needs by proposing and developing an inclusive educational platform. Integrating machine learning, natural language processing, and cross-platform interfaces, the platform features key functionalities such as speech recognition functionality to support voice commands and text generation via voice input; real-time object recognition using the YOLOv5 model, adapted for educational environments; Grapheme-to-Phoneme (G2P) conversion for Text-to-Speech systems using seq2seq models with attention, ensuring natural and fluent voice synthesis; and the development of a cross-platform mobile application in Flutter with on-device inference execution using TensorFlow Lite. The results demonstrated high accuracy, usability, and positive impact in educational scenarios, validating the proposal as an effective tool for educational inclusion. This project underscores the importance of open and accessible technologies in promoting inclusive and quality education. </p> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2503.15581" title="Abstract" id="2503.15581"> arXiv:2503.15581 </a> (cross-list from cs.LG) [<a href="/pdf/2503.15581" title="Download PDF" id="pdf-2503.15581" aria-labelledby="pdf-2503.15581">pdf</a>, <a href="https://arxiv.org/html/2503.15581v1" title="View HTML" id="html-2503.15581" aria-labelledby="html-2503.15581" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15581" title="Other formats" id="oth-2503.15581" aria-labelledby="oth-2503.15581">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Performance-bounded Online Ensemble Learning Method Based on Multi-armed bandits and Its Applications in Real-time Safety Assessment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+S">Songqiao Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zeyi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+X">Xiao He</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> Ensemble learning plays a crucial role in practical applications of online learning due to its enhanced classification performance and adaptable adjustment mechanisms. However, most weight allocation strategies in ensemble learning are heuristic, making it challenging to theoretically guarantee that the ensemble classifier outperforms its base classifiers. To address this issue, a performance-bounded online ensemble learning method based on multi-armed bandits, named PB-OEL, is proposed in this paper. Specifically, multi-armed bandit with expert advice is incorporated into online ensemble learning, aiming to update the weights of base classifiers and make predictions. A theoretical framework is established to bound the performance of the ensemble classifier relative to base classifiers. By setting expert advice of bandits, the bound exceeds the performance of any base classifier when the length of data stream is sufficiently large. Additionally, performance bounds for scenarios with limited annotations are also derived. Numerous experiments on benchmark datasets and a dataset of real-time safety assessment tasks are conducted. The experimental results validate the theoretical bound to a certain extent and demonstrate that the proposed method outperforms existing state-of-the-art methods. </p> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2503.15618" title="Abstract" id="2503.15618"> arXiv:2503.15618 </a> (cross-list from cs.IT) [<a href="/pdf/2503.15618" title="Download PDF" id="pdf-2503.15618" aria-labelledby="pdf-2503.15618">pdf</a>, <a href="https://arxiv.org/html/2503.15618v1" title="View HTML" id="html-2503.15618" aria-labelledby="html-2503.15618" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15618" title="Other formats" id="oth-2503.15618" aria-labelledby="oth-2503.15618">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Secrecy Performance of $伪$-$\mathcal{F}$ Channels with Pointing Errors </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Neves,+G+M+C">Gabriel M. C. Neves</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Silva,+H+S">Hugerles S. Silva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Silva,+H+T+P">Higo T. P. Silva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Queiroz,+W+J+L">Wamberto J. L. Queiroz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Figueiredo,+F+A+P">Felipe A. P. Figueiredo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Souza,+R+A+A">Rausley A. A. de Souza</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> This paper investigates the physical layer security (PLS) performance of $\alpha$-$\mathcal{F}$ fading channels with pointing errors under passive and active eavesdropping scenarios. Novel analytical expressions are derived for key PLS metrics, including the probability of strictly positive secrecy capacity, the average secrecy capacity, and the secure outage probability. An asymptotic analysis is also investigated to provide further insights into the system behavior under high signal-to-noise ratio conditions. The analytical results are validated through Monte Carlo simulations, with several performance curves presented for a range of channel and system parameters. All expressions derived in this work are original and have not been previously published. </p> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2503.15694" title="Abstract" id="2503.15694"> arXiv:2503.15694 </a> (cross-list from quant-ph) [<a href="/pdf/2503.15694" title="Download PDF" id="pdf-2503.15694" aria-labelledby="pdf-2503.15694">pdf</a>, <a href="https://arxiv.org/html/2503.15694v1" title="View HTML" id="html-2503.15694" aria-labelledby="html-2503.15694" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15694" title="Other formats" id="oth-2503.15694" aria-labelledby="oth-2503.15694">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Regulation of a continuously monitored quantum harmonic oscillator with inefficient detectors </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Sabbagh,+R">Ralph Sabbagh</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Miangolarra,+O+M">Olga Movilla Miangolarra</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Georgiou,+T+T">Tryphon T. Georgiou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> We study the control problem of regulating the purity of a quantum harmonic oscillator in a Gaussian state via weak measurements. Specifically, we assume time-invariant Hamiltonian dynamics and that control is exerted via the back-action induced from monitoring the oscillator's position and momentum observables; the manipulation of the detector measurement strengths regulates the purity of the target Gaussian quantum state. After briefly drawing connections between Gaussian quantum dynamics and stochastic control, we focus on the effect of inefficient detectors and derive closed-form expressions for the transient and steady-state dynamics of the state covariance. We highlight the degradation of attainable purity that is due to inefficient detectors, as compared to that dictated by the Robertson-Schr枚dinger uncertainty relation. Our results suggest that quantum correlations can enhance the purity at steady-state. The quantum harmonic oscillator represents a basic system where analytic formulae may provide insights into the role of inefficient measurements in quantum control; the gained insights are pertinent to measurement-based quantum engines and cooling experiments. </p> </div> </dd> <dt> <a name='item32'>[32]</a> <a href ="/abs/2503.15769" title="Abstract" id="2503.15769"> arXiv:2503.15769 </a> (cross-list from cs.DC) [<a href="/pdf/2503.15769" title="Download PDF" id="pdf-2503.15769" aria-labelledby="pdf-2503.15769">pdf</a>, <a href="https://arxiv.org/html/2503.15769v1" title="View HTML" id="html-2503.15769" aria-labelledby="html-2503.15769" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15769" title="Other formats" id="oth-2503.15769" aria-labelledby="oth-2503.15769">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Prediction of Permissioned Blockchain Performance for Resource Scaling Configurations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jung,+S">Seungwoo Jung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yoo,+Y">Yeonho Yoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+G">Gyeongsik Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yoo,+C">Chuck Yoo</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> ICT Express, Volume 10, Issue 6, December 2024, Pages 1253-1258 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span>; Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> Blockchain is increasingly offered as blockchain-as-a-service (BaaS) by cloud service providers. However, configuring BaaS appropriately for optimal performance and reliability resorts to try-and-error. A key challenge is that BaaS is often perceived as a ``black-box,'' leading to uncertainties in performance and resource provisioning. Previous studies attempted to address this challenge; however, the impacts of both vertical and horizontal scaling remain elusive. To this end, we present machine learning-based models to predict network reliability and throughput based on scaling configurations. In our evaluation, the models exhibit prediction errors of ~1.9%, which is highly accurate and can be applied in the real-world. </p> </div> </dd> <dt> <a name='item33'>[33]</a> <a href ="/abs/2503.15819" title="Abstract" id="2503.15819"> arXiv:2503.15819 </a> (cross-list from cs.RO) [<a href="/pdf/2503.15819" title="Download PDF" id="pdf-2503.15819" aria-labelledby="pdf-2503.15819">pdf</a>, <a href="https://arxiv.org/html/2503.15819v1" title="View HTML" id="html-2503.15819" aria-labelledby="html-2503.15819" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15819" title="Other formats" id="oth-2503.15819" aria-labelledby="oth-2503.15819">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Control Pneumatic Soft Bending Actuator with Online Learning Pneumatic Physical Reservoir Computing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+J">Junyi Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Miyazaki,+T">Tetsuro Miyazaki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kawashima,+K">Kenji Kawashima</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 13 figures, IEEE-RAS International Conference on Soft Robotics (RoboSoft 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> The intrinsic nonlinearities of soft robots present significant control but simultaneously provide them with rich computational potential. Reservoir computing (RC) has shown effectiveness in online learning systems for controlling nonlinear systems such as soft actuators. Conventional RC can be extended into physical reservoir computing (PRC) by leveraging the nonlinear dynamics of soft actuators for computation. This paper introduces a PRC-based online learning framework to control the motion of a pneumatic soft bending actuator, utilizing another pneumatic soft actuator as the PRC model. Unlike conventional designs requiring two RC models, the proposed control system employs a more compact architecture with a single RC model. Additionally, the framework enables zero-shot online learning, addressing limitations of previous PRC-based control systems reliant on offline training. Simulations and experiments validated the performance of the proposed system. Experimental results indicate that the PRC model achieved superior control performance compared to a linear model, reducing the root-mean-square error (RMSE) by an average of over 37% in bending motion control tasks. The proposed PRC-based online learning control framework provides a novel approach for harnessing physical systems' inherent nonlinearities to enhance the control of soft actuators. </p> </div> </dd> <dt> <a name='item34'>[34]</a> <a href ="/abs/2503.15915" title="Abstract" id="2503.15915"> arXiv:2503.15915 </a> (cross-list from cs.RO) [<a href="/pdf/2503.15915" title="Download PDF" id="pdf-2503.15915" aria-labelledby="pdf-2503.15915">pdf</a>, <a href="https://arxiv.org/html/2503.15915v1" title="View HTML" id="html-2503.15915" aria-labelledby="html-2503.15915" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15915" title="Other formats" id="oth-2503.15915" aria-labelledby="oth-2503.15915">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Development of a Magnetorheological Hand Exoskeleton Featuring High Force-to-power Ratio for Enhancing Grip Endurance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wenbo Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mai,+X">Xianlong Mai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Ying Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> Hand exoskeletons have significant potential in labor-intensive fields by mitigating hand grip fatigue, enhancing hand strength, and preventing <a href="http://injuries.However" rel="external noopener nofollow" class="link-external link-http">this http URL</a>, most traditional hand exoskeletons are driven by motors whose output force is limited under constrained installation conditions. In addition, they also come with the disadvantages of high power consumption, complex and bulky assistive systems, and high <a href="http://instability.In" rel="external noopener nofollow" class="link-external link-http">this http URL</a> this work, we develop a novel hand exoskeleton integrated with magnetorheological (MR) clutches that offers a high force-to-power ratio to improve grip endurance. The clutch features an enhanced structure design, a micro roller enhancing structure, which can significantly boost output forces. The experimental data demonstrate that the clutch can deliver a peak holding force of 380 N with a consumption of 1.48 W, yielding a force-to-power ratio of 256.75N/W, which is 2.35 times higher than the best reported actuator used for hand exoskeletons. The designed MR hand exoskeleton is highly integrated and comprises an exoskeleton frame, MR clutches, a control unit, and a battery. Evaluations through static grip endurance tests and dynamic carrying and lifting tests confirm that the MR hand exoskeleton can effectively reduce muscle fatigue, extend grip endurance, and minimize injuries. These findings highlight its strong potential for practical applications in repetitive tasks such as carrying and lifting in industrial settings. </p> </div> </dd> <dt> <a name='item35'>[35]</a> <a href ="/abs/2503.15968" title="Abstract" id="2503.15968"> arXiv:2503.15968 </a> (cross-list from cs.CR) [<a href="/pdf/2503.15968" title="Download PDF" id="pdf-2503.15968" aria-labelledby="pdf-2503.15968">pdf</a>, <a href="https://arxiv.org/html/2503.15968v1" title="View HTML" id="html-2503.15968" aria-labelledby="html-2503.15968" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15968" title="Other formats" id="oth-2503.15968" aria-labelledby="oth-2503.15968">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Digital Asset Data Lakehouse. The concept based on a blockchain research center </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bag,+R+C">Raul Cristian Bag</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, system architecture </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Databases (cs.DB); Systems and Control (eess.SY) </div> <p class='mathjax'> In the rapidly evolving landscape of digital assets and blockchain technologies, the necessity for robust, scalable, and secure data management platforms has never been more critical. This paper introduces a novel software architecture designed to meet these demands by leveraging the inherent strengths of cloud-native technologies and modular micro-service based architectures, to facilitate efficient data management, storage and access, across different stakeholders. We detail the architectural design, including its components and interactions, and discuss how it addresses common challenges in managing blockchain data and digital assets, such as scalability, data siloing, and security vulnerabilities. We demonstrate the capabilities of the platform by employing it into multiple real-life scenarios, namely providing data in near real-time to scientists in help with their research. Our results indicate that the proposed architecture not only enhances the efficiency and scalability of distributed data management but also opens new avenues for innovation in the research reproducibility area. This work lays the groundwork for future research and development in machine learning operations systems, offering a scalable and secure framework for the burgeoning digital economy. </p> </div> </dd> <dt> <a name='item36'>[36]</a> <a href ="/abs/2503.15984" title="Abstract" id="2503.15984"> arXiv:2503.15984 </a> (cross-list from cs.CV) [<a href="/pdf/2503.15984" title="Download PDF" id="pdf-2503.15984" aria-labelledby="pdf-2503.15984">pdf</a>, <a href="https://arxiv.org/html/2503.15984v1" title="View HTML" id="html-2503.15984" aria-labelledby="html-2503.15984" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15984" title="Other formats" id="oth-2503.15984" aria-labelledby="oth-2503.15984">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DIPLI: Deep Image Prior Lucky Imaging for Blind Astronomical Image Restoration </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Singh,+S">Suraj Singh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Batsheva,+A">Anastasia Batsheva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rogov,+O+Y">Oleg Y. Rogov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bouridane,+A">Ahmed Bouridane</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 7 figures, 2 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Instrumentation and Methods for Astrophysics (astro-ph.IM); Artificial Intelligence (cs.AI); Image and Video Processing (eess.IV) </div> <p class='mathjax'> Contemporary image restoration and super-resolution techniques effectively harness deep neural networks, markedly outperforming traditional methods. However, astrophotography presents unique challenges for deep learning due to limited training data. This work explores hybrid strategies, such as the Deep Image Prior (DIP) model, which facilitates blind training but is susceptible to overfitting, artifact generation, and instability when handling noisy images. We propose enhancements to the DIP model's baseline performance through several advanced techniques. First, we refine the model to process multiple frames concurrently, employing the Back Projection method and the TVNet model. Next, we adopt a Markov approach incorporating Monte Carlo estimation, Langevin dynamics, and a variational input technique to achieve unbiased estimates with minimal variance and counteract overfitting effectively. Collectively, these modifications reduce the likelihood of noise learning and mitigate loss function fluctuations during training, enhancing result stability. We validated our algorithm across multiple image sets of astronomical and celestial objects, achieving performance that not only mitigates limitations of Lucky Imaging, a classical computer vision technique that remains a standard in astronomical image reconstruction but surpasses the original DIP model, state of the art transformer- and diffusion-based models, underscoring the significance of our improvements. </p> </div> </dd> <dt> <a name='item37'>[37]</a> <a href ="/abs/2503.16076" title="Abstract" id="2503.16076"> arXiv:2503.16076 </a> (cross-list from math.OC) [<a href="/pdf/2503.16076" title="Download PDF" id="pdf-2503.16076" aria-labelledby="pdf-2503.16076">pdf</a>, <a href="/format/2503.16076" title="Other formats" id="oth-2503.16076" aria-labelledby="oth-2503.16076">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Control Lyapunov Function Design via Configuration-Constrained Polyhedral Computing </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Houska,+B">Boris Houska</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=M%C3%BCller,+M+A">Matthias A. M眉ller</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Villanueva,+M+E">Mario E. Villanueva</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> This paper proposes novel approaches for designing control Lyapunov functions (CLFs) for constrained linear systems. We leverage recent configuration-constrained polyhedral computing techniques to devise piecewise affine convex CLFs. Additionally, we generalize these methods to uncertain systems with both additive and multiplicative disturbances. The proposed design methods are capable of approximating the infinite horizon cost function of both nominal and min-max optimal control problems by solving a single, one-stage, convex optimization problem. As such, these methods find practical applications in explicit controller design as well as in determining terminal regions and cost functions for nominal and min-max model predictive control (MPC). Numerical examples illustrate the effectiveness of this approach. </p> </div> </dd> <dt> <a name='item38'>[38]</a> <a href ="/abs/2503.16107" title="Abstract" id="2503.16107"> arXiv:2503.16107 </a> (cross-list from cs.LG) [<a href="/pdf/2503.16107" title="Download PDF" id="pdf-2503.16107" aria-labelledby="pdf-2503.16107">pdf</a>, <a href="/format/2503.16107" title="Other formats" id="oth-2503.16107" aria-labelledby="oth-2503.16107">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learn to Bid as a Price-Maker Wind Power Producer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Singhal,+S">Shobhit Singhal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fochesato,+M">Marta Fochesato</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aolaritei,+L">Liviu Aolaritei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=D%C3%B6rfler,+F">Florian D枚rfler</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> Wind power producers (WPPs) participating in short-term power markets face significant imbalance costs due to their non-dispatchable and variable production. While some WPPs have a large enough market share to influence prices with their bidding decisions, existing optimal bidding methods rarely account for this aspect. Price-maker approaches typically model bidding as a bilevel optimization problem, but these methods require complex market models, estimating other participants' actions, and are computationally demanding. To address these challenges, we propose an online learning algorithm that leverages contextual information to optimize WPP bids in the price-maker setting. We formulate the strategic bidding problem as a contextual multi-armed bandit, ensuring provable regret minimization. The algorithm's performance is evaluated against various benchmark strategies using a numerical simulation of the German day-ahead and real-time markets. </p> </div> </dd> <dt> <a name='item39'>[39]</a> <a href ="/abs/2503.16192" title="Abstract" id="2503.16192"> arXiv:2503.16192 </a> (cross-list from cs.LG) [<a href="/pdf/2503.16192" title="Download PDF" id="pdf-2503.16192" aria-labelledby="pdf-2503.16192">pdf</a>, <a href="https://arxiv.org/html/2503.16192v1" title="View HTML" id="html-2503.16192" aria-labelledby="html-2503.16192" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16192" title="Other formats" id="oth-2503.16192" aria-labelledby="oth-2503.16192">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Nonparametric Bellman Mappings for Value Iteration in Distributed Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Akiyama,+Y">Yuki Akiyama</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Slavakis,+K">Konstantinos Slavakis</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> This paper introduces novel Bellman mappings (B-Maps) for value iteration (VI) in distributed reinforcement learning (DRL), where multiple agents operate over a network without a centralized fusion node. Each agent constructs its own nonparametric B-Map for VI while communicating only with direct neighbors to achieve consensus. These B-Maps operate on Q-functions represented in a reproducing kernel Hilbert space, enabling a nonparametric formulation that allows for flexible, agent-specific basis function design. Unlike existing DRL methods that restrict information exchange to Q-function estimates, the proposed framework also enables agents to share basis information in the form of covariance matrices, capturing additional structural details. A theoretical analysis establishes linear convergence rates for both Q-function and covariance-matrix estimates toward their consensus values. The optimal learning rates for consensus-based updates are dictated by the ratio of the smallest positive eigenvalue to the largest one of the network's Laplacian matrix. Furthermore, each nodal Q-function estimate is shown to lie very close to the fixed point of a centralized nonparametric B-Map, effectively allowing the proposed DRL design to approximate the performance of a centralized fusion center. Numerical experiments on two well-known control problems demonstrate the superior performance of the proposed nonparametric B-Maps compared to prior methods. Notably, the results reveal a counter-intuitive finding: although the proposed approach involves greater information exchange -- specifically through the sharing of covariance matrices -- it achieves the desired performance with lower cumulative communication cost than existing DRL schemes, highlighting the crucial role of basis information in accelerating the learning process. </p> </div> </dd> <dt> <a name='item40'>[40]</a> <a href ="/abs/2503.16227" title="Abstract" id="2503.16227"> arXiv:2503.16227 </a> (cross-list from cs.HC) [<a href="/pdf/2503.16227" title="Download PDF" id="pdf-2503.16227" aria-labelledby="pdf-2503.16227">pdf</a>, <a href="https://arxiv.org/html/2503.16227v1" title="View HTML" id="html-2503.16227" aria-labelledby="html-2503.16227" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16227" title="Other formats" id="oth-2503.16227" aria-labelledby="oth-2503.16227">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Flight Testing an Optionally Piloted Aircraft: a Case Study on Trust Dynamics in Human-Autonomy Teaming </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J+C">Jeremy C.-H. Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hou,+M">Ming Hou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dunwoody,+D">David Dunwoody</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ilievski,+M">Marko Ilievski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tomasi,+J">Justin Tomasi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chao,+E">Edward Chao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pigeon,+C">Carl Pigeon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE International Conference on Human-Machine Systems 2025, keywords: trust, human factors, aviation, safety-critical, human-autonomy teaming </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Artificial Intelligence (cs.AI); Emerging Technologies (cs.ET); Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> This paper examines how trust is formed, maintained, or diminished over time in the context of human-autonomy teaming with an optionally piloted aircraft. Whereas traditional factor-based trust models offer a static representation of human confidence in technology, here we discuss how variations in the underlying factors lead to variations in trust, trust thresholds, and human behaviours. Over 200 hours of flight test data collected over a multi-year test campaign from 2021 to 2023 were reviewed. The dispositional-situational-learned, process-performance-purpose, and IMPACTS homeostasis trust models are applied to illuminate trust trends during nominal autonomous flight operations. The results offer promising directions for future studies on trust dynamics and design-for-trust in human-autonomy teaming. </p> </div> </dd> <dt> <a name='item41'>[41]</a> <a href ="/abs/2503.16256" title="Abstract" id="2503.16256"> arXiv:2503.16256 </a> (cross-list from physics.app-ph) [<a href="/pdf/2503.16256" title="Download PDF" id="pdf-2503.16256" aria-labelledby="pdf-2503.16256">pdf</a>, <a href="https://arxiv.org/html/2503.16256v1" title="View HTML" id="html-2503.16256" aria-labelledby="html-2503.16256" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16256" title="Other formats" id="oth-2503.16256" aria-labelledby="oth-2503.16256">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scalable Multiport Antenna Array Characterization with PCB-Realized Tunable Load Network Providing Additional "Virtual" VNA Ports </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Tapie,+J">Jean Tapie</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=del+Hougne,+P">Philipp del Hougne</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages including 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Applied Physics (physics.app-ph)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> We prototype a PCB-realized tunable load network whose ports serve as additional "virtual" VNA ports in a "Virtual VNA" measurement setup. The latter enables the estimation of a many-port antenna array's scattering matrix with a few-port VNA, without any reconnections. We experimentally validate the approach for various eight-element antenna arrays in an anechoic chamber in the 700-900 MHz regime. We also improve the noise robustness of a step of the "Virtual VNA" post-processing algorithms by leveraging spectral correlations. Altogether, our PCB-realized VNA Extension Kit offers a scalable solution to characterize very large antenna arrays because of its low cost, small footprint, fully automated operation, and modular nature. </p> </div> </dd> <dt> <a name='item42'>[42]</a> <a href ="/abs/2503.16302" title="Abstract" id="2503.16302"> arXiv:2503.16302 </a> (cross-list from cs.CV) [<a href="/pdf/2503.16302" title="Download PDF" id="pdf-2503.16302" aria-labelledby="pdf-2503.16302">pdf</a>, <a href="https://arxiv.org/html/2503.16302v1" title="View HTML" id="html-2503.16302" aria-labelledby="html-2503.16302" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16302" title="Other formats" id="oth-2503.16302" aria-labelledby="oth-2503.16302">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unleashing Vecset Diffusion Model for Fast Shape Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lai,+Z">Zeqiang Lai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Y">Yunfei Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zibo Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+H">Haolin Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+F">Fuyun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+H">Huiwen Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xianghui Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Q">Qinxiang Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+J">Jinwei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yuhong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+J">Jie Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+C">Chunchao Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yue,+X">Xiangyu Yue</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Technical report </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Image and Video Processing (eess.IV) </div> <p class='mathjax'> 3D shape generation has greatly flourished through the development of so-called "native" 3D diffusion, particularly through the Vecset Diffusion Model (VDM). While recent advancements have shown promising results in generating high-resolution 3D shapes, VDM still struggles with high-speed generation. Challenges exist because of difficulties not only in accelerating diffusion sampling but also VAE decoding in VDM, areas under-explored in previous works. To address these challenges, we present FlashVDM, a systematic framework for accelerating both VAE and DiT in VDM. For DiT, FlashVDM enables flexible diffusion sampling with as few as 5 inference steps and comparable quality, which is made possible by stabilizing consistency distillation with our newly introduced Progressive Flow Distillation. For VAE, we introduce a lightning vecset decoder equipped with Adaptive KV Selection, Hierarchical Volume Decoding, and Efficient Network Design. By exploiting the locality of the vecset and the sparsity of shape surface in the volume, our decoder drastically lowers FLOPs, minimizing the overall decoding overhead. We apply FlashVDM to Hunyuan3D-2 to obtain Hunyuan3D-2 Turbo. Through systematic evaluation, we show that our model significantly outperforms existing fast 3D generation methods, achieving comparable performance to the state-of-the-art while reducing inference time by over 45x for reconstruction and 32x for generation. Code and models are available at <a href="https://github.com/Tencent/FlashVDM" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item43'>[43]</a> <a href ="/abs/2503.16357" title="Abstract" id="2503.16357"> arXiv:2503.16357 </a> (cross-list from cs.CV) [<a href="/pdf/2503.16357" title="Download PDF" id="pdf-2503.16357" aria-labelledby="pdf-2503.16357">pdf</a>, <a href="https://arxiv.org/html/2503.16357v1" title="View HTML" id="html-2503.16357" aria-labelledby="html-2503.16357" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16357" title="Other formats" id="oth-2503.16357" aria-labelledby="oth-2503.16357">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> UniSync: A Unified Framework for Audio-Visual Synchronization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+T">Tao Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Y">Yifan Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guan,+X">Xun Guan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+J">Jiyuan Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhou Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+F">Fei Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+F">Fei Yu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 3 figures, accepted by ICME 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Precise audio-visual synchronization in speech videos is crucial for content quality and viewer comprehension. Existing methods have made significant strides in addressing this challenge through rule-based approaches and end-to-end learning techniques. However, these methods often rely on limited audio-visual representations and suboptimal learning strategies, potentially constraining their effectiveness in more complex scenarios. To address these limitations, we present UniSync, a novel approach for evaluating audio-visual synchronization using embedding similarities. UniSync offers broad compatibility with various audio representations (e.g., Mel spectrograms, HuBERT) and visual representations (e.g., RGB images, face parsing maps, facial landmarks, 3DMM), effectively handling their significant dimensional differences. We enhance the contrastive learning framework with a margin-based loss component and cross-speaker unsynchronized pairs, improving discriminative capabilities. UniSync outperforms existing methods on standard datasets and demonstrates versatility across diverse audio-visual representations. Its integration into talking face generation frameworks enhances synchronization quality in both natural and AI-generated content. </p> </div> </dd> <dt> <a name='item44'>[44]</a> <a href ="/abs/2503.16366" title="Abstract" id="2503.16366"> arXiv:2503.16366 </a> (cross-list from physics.optics) [<a href="/pdf/2503.16366" title="Download PDF" id="pdf-2503.16366" aria-labelledby="pdf-2503.16366">pdf</a>, <a href="https://arxiv.org/html/2503.16366v1" title="View HTML" id="html-2503.16366" aria-labelledby="html-2503.16366" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.16366" title="Other formats" id="oth-2503.16366" aria-labelledby="oth-2503.16366">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dynamic Metasurface-Backed Luneburg Lens for Multiplexed Backscatter Communication </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Kim,+S">Samuel Kim</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Sleasman,+T">Tim Sleasman</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Rakovsky,+A">Avrami Rakovsky</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Awadallah,+R">Ra'id Awadallah</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Shrekenhamer,+D+B">David B. Shrekenhamer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optics (physics.optics)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> Backscatter communications is attractive for its low power requirements due to the lack of actively radiating components; however, commonly used devices are typically limited in range and functionality. Here, we design and demonstrate a flattened Luneburg lens combined with a spatially-tunable dynamic metasurface to create a low-power backscatter communicator. The Luneburg lens is a spherically-symmetric lens that focuses a collimated beam from any direction, enabling a wide field-of-view with no aberrations. By applying quasi-conformal transformation optics (QCTO), we design a flattened Luneburg lens to facilitate its seamless interface with the planar metasurface. The gradient index of the Luneburg lens is realized through additive manufacturing. We show that the flattened Luneburg lens with a reflective surface at the flattened focal plane is able to achieve diffraction-limited retroreflection, enabling long-range backscatter communication. When an interrogator transmits towards the metasurface-backed Luneburg lens, the device can modulate the reflected signal phase across a wide field of regard to communicate data. We experimentally show that the spatial control over the metasurface allows different bit streams to be simultaneously communicated in different directions. Additionally, we show that the device is able to prevent eavesdroppers from receiving information, thus securing communications. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 31 of 31 entries)</h3> <dt> <a name='item45'>[45]</a> <a href ="/abs/2108.06062" title="Abstract" id="2108.06062"> arXiv:2108.06062 </a> (replaced) [<a href="/pdf/2108.06062" title="Download PDF" id="pdf-2108.06062" aria-labelledby="pdf-2108.06062">pdf</a>, <a href="/format/2108.06062" title="Other formats" id="oth-2108.06062" aria-labelledby="oth-2108.06062">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Worst-Case Services and State-Based Scheduling </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+Y">Yike Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Andersland,+M+S">Mark S. Andersland</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Optimization and Control (math.OC) </div> <p class='mathjax'> In this paper, we shed new light on a classical scheduling problem: given a slot-timed, constant-capacity server, what short-run scheduling decisions must be made to provide long-run service guarantees to competing flows of unit-sized tasks? We model each flow's long-run guarantee as a worst-case service that maps each queued arrival vector recording the flow's cumulative task arrivals, including those initially queued, to a worst-case acceptable departure vector lower-bounding its cumulative served tasks. We show that these maps are states that can be updated as tasks arrive and are served, introduce state-based scheduling, find the schedulability condition necessary and sufficient to maintain all flows' long-run guarantees, and use this condition to identify all short-run scheduling decisions that preserve schedulability. Our framework is general but computationally complex. To reduce complexity, we consider three specializations. First, we show that when satisfactory short-run scheduling decisions exist, at least one can be efficiently identified by maximizing the server's capacity slack, a generalization of the earliest-deadline-first rule. Second, we show that a special class of worst-case services, min-plus services, can be efficiently specified and updated using properties of the min-plus algebra. Finally, we show that efficiency can be further improved by restricting attention to a min-plus service subclass, dual-curve services. This last specialization turns out to be a dynamic extension of service curves that maintains all essential features of our framework while approaching near practical viability. </p> </div> </dd> <dt> <a name='item46'>[46]</a> <a href ="/abs/2209.12075" title="Abstract" id="2209.12075"> arXiv:2209.12075 </a> (replaced) [<a href="/pdf/2209.12075" title="Download PDF" id="pdf-2209.12075" aria-labelledby="pdf-2209.12075">pdf</a>, <a href="https://arxiv.org/html/2209.12075v3" title="View HTML" id="html-2209.12075" aria-labelledby="html-2209.12075" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2209.12075" title="Other formats" id="oth-2209.12075" aria-labelledby="oth-2209.12075">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> S^2-Transformer for Mask-Aware Hyperspectral Image Reconstruction </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+J">Jiamian Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+K">Kunpeng Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+Y">Yulun Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yuan,+X">Xin Yuan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tao,+Z">Zhiqiang Tao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by TPAMI </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Snapshot compressive imaging (SCI) surges as a novel way of capturing hyperspectral images. It operates an optical encoder to compress the 3D data into a 2D measurement and adopts a software decoder for the signal reconstruction. Recently, a representative SCI set-up of coded aperture snapshot compressive imager (CASSI) with Transformer reconstruction backend remarks high-fidelity sensing performance. However, dominant spatial and spectral attention designs show limitations in hyperspectral modeling. The spatial attention values describe the inter-pixel correlation but overlook the across-spectra variation within each pixel. The spectral attention size is unscalable to the token spatial size and thus bottlenecks information allocation. Besides, CASSI entangles the spatial and spectral information into a 2D measurement, placing a barrier for information disentanglement and modeling. In addition, CASSI blocks the light with a physical binary mask, yielding the masked data loss. To tackle above challenges, we propose a spatial-spectral (S2-) Transformer implemented by a paralleled attention design and a mask-aware learning strategy. Firstly, we systematically explore pros and cons of different spatial (-spectral) attention designs, based on which we find performing both attentions in parallel well disentangles and models the blended information. Secondly, the masked pixels induce higher prediction difficulty and should be treated differently from unmasked ones. We adaptively prioritize the loss penalty attributing to the mask structure by referring to the mask-encoded prediction as an uncertainty estimator. We theoretically discuss the distinct convergence tendencies between masked/unmasked regions of the proposed learning strategy. Extensive experiments demonstrate that on average, the results of the proposed method are superior over the state-of-the-art method. </p> </div> </dd> <dt> <a name='item47'>[47]</a> <a href ="/abs/2402.08027" title="Abstract" id="2402.08027"> arXiv:2402.08027 </a> (replaced) [<a href="/pdf/2402.08027" title="Download PDF" id="pdf-2402.08027" aria-labelledby="pdf-2402.08027">pdf</a>, <a href="https://arxiv.org/html/2402.08027v2" title="View HTML" id="html-2402.08027" aria-labelledby="html-2402.08027" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.08027" title="Other formats" id="oth-2402.08027" aria-labelledby="oth-2402.08027">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Stability of Undesirable Equilibria in the Quadratic Program Framework for Safety-Critical Control </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Reis,+M+F">Matheus F. Reis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Aguiar,+A+P">A. Pedro Aguiar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to IFAC Automatica. Under review </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Control Lyapunov functions (CLFs) and Control Barrier Functions (CBFs) have been used to develop provably safe controllers by means of quadratic programs (QPs). This framework guarantees safety in the form of trajectory invariance with respect to a given set, but it can introduce undesirable equilibrium points to the closed loop system, which can be asymptotically stable. In this work, we present a detailed study of the formation and stability of equilibrium points with the CLF-CBF-QP framework with multiple CBFs. In particular, we prove that undesirable equilibrium points occur for most systems, and their stability is dependent on the CLF and CBF geometrical properties. We introduce the concept of CLF-CBF compatibility for a system, regarding a CLF-CBF pair inducing no stable equilibrium points other than the CLF global minimum on the corresponding closed-loop dynamics. Sufficient conditions for CLF-CBF compatibility for LTI and drift-less full-rank systems with quadratic CLF and CBFs are derived, and we propose a novel control strategy to induce smooth changes in the CLF geometry at certain regions of the state space in order to satisfy the CLF-CBF compatibility conditions, aiming to achieve safety with respect to multiple safety objectives and quasi-global convergence of the trajectories towards the CLF minimum. Numeric simulations illustrate the applicability of the proposed method. </p> </div> </dd> <dt> <a name='item48'>[48]</a> <a href ="/abs/2402.09488" title="Abstract" id="2402.09488"> arXiv:2402.09488 </a> (replaced) [<a href="/pdf/2402.09488" title="Download PDF" id="pdf-2402.09488" aria-labelledby="pdf-2402.09488">pdf</a>, <a href="https://arxiv.org/html/2402.09488v2" title="View HTML" id="html-2402.09488" aria-labelledby="html-2402.09488" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.09488" title="Other formats" id="oth-2402.09488" aria-labelledby="oth-2402.09488">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Intelligent Agricultural Greenhouse Control System Based on Internet of Things and Machine Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+C">Cangqing Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gong,+J">Jiangchuan Gong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> This study endeavors to conceptualize and execute a sophisticated agricultural greenhouse control system grounded in the amalgamation of the Internet of Things (IoT) and machine learning. Through meticulous monitoring of intrinsic environmental parameters within the greenhouse and the integration of machine learning algorithms, the conditions within the greenhouse are aptly modulated. The envisaged outcome is an enhancement in crop growth efficiency and yield, accompanied by a reduction in resource wastage. In the backdrop of escalating global population figures and the escalating exigencies of climate change, agriculture confronts unprecedented challenges. Conventional agricultural paradigms have proven inadequate in addressing the imperatives of food safety and production efficiency. Against this backdrop, greenhouse agriculture emerges as a viable solution, proffering a controlled milieu for crop cultivation to augment yields, refine quality, and diminish reliance on natural resources [b1]. Nevertheless, greenhouse agriculture contends with a gamut of challenges. Traditional greenhouse management strategies, often grounded in experiential knowledge and predefined rules, lack targeted personalized regulation, thereby resulting in resource inefficiencies. The exigencies of real-time monitoring and precise control of the greenhouse's internal environment gain paramount importance with the burgeoning scale of agriculture. To redress this challenge, the study introduces IoT technology and machine learning algorithms into greenhouse agriculture, aspiring to institute an intelligent agricultural greenhouse control system conducive to augmenting the efficiency and sustainability of agricultural production. </p> </div> </dd> <dt> <a name='item49'>[49]</a> <a href ="/abs/2403.05906" title="Abstract" id="2403.05906"> arXiv:2403.05906 </a> (replaced) [<a href="/pdf/2403.05906" title="Download PDF" id="pdf-2403.05906" aria-labelledby="pdf-2403.05906">pdf</a>, <a href="https://arxiv.org/html/2403.05906v2" title="View HTML" id="html-2403.05906" aria-labelledby="html-2403.05906" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.05906" title="Other formats" id="oth-2403.05906" aria-labelledby="oth-2403.05906">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Segmentation Guided Sparse Transformer for Under-Display Camera Image Restoration </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Xue,+J">Jingyun Xue</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+T">Tao Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Dai,+P">Pengwen Dai</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+K">Kaihao Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 10 figures, conference or other essential info </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Under-Display Camera (UDC) is an emerging technology that achieves full-screen display via hiding the camera under the display panel. However, the current implementation of UDC causes serious degradation. The incident light required for camera imaging undergoes attenuation and diffraction when passing through the display panel, leading to various artifacts in UDC imaging. Presently, the prevailing UDC image restoration methods predominantly utilize convolutional neural network architectures, whereas Transformer-based methods have exhibited superior performance in the majority of image restoration tasks. This is attributed to the Transformer's capability to sample global features for the local reconstruction of images, thereby achieving high-quality image restoration. In this paper, we observe that when using the Vision Transformer for UDC degraded image restoration, the global attention samples a large amount of redundant information and noise. Furthermore, compared to the ordinary Transformer employing dense attention, the Transformer utilizing sparse attention can alleviate the adverse impact of redundant information and noise. Building upon this discovery, we propose a Segmentation Guided Sparse Transformer method (SGSFormer) for the task of restoring high-quality images from UDC degraded images. Specifically, we utilize sparse self-attention to filter out redundant information and noise, directing the model's attention to focus on the features more relevant to the degraded regions in need of reconstruction. Moreover, we integrate the instance segmentation map as prior information to guide the sparse self-attention in filtering and focusing on the correct regions. </p> </div> </dd> <dt> <a name='item50'>[50]</a> <a href ="/abs/2404.15305" title="Abstract" id="2404.15305"> arXiv:2404.15305 </a> (replaced) [<a href="/pdf/2404.15305" title="Download PDF" id="pdf-2404.15305" aria-labelledby="pdf-2404.15305">pdf</a>, <a href="https://arxiv.org/html/2404.15305v2" title="View HTML" id="html-2404.15305" aria-labelledby="html-2404.15305" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.15305" title="Other formats" id="oth-2404.15305" aria-labelledby="oth-2404.15305">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SelfReplay: Adapting Self-Supervised Sensory Models via Adaptive Meta-Task Replay </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Yoon,+H">Hyungjun Yoon</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kwak,+J">Jaehyun Kwak</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tolera,+B+A">Biniyam Aschalew Tolera</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Dai,+G">Gaole Dai</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+M">Mo Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gong,+T">Taesik Gong</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lee,+K">Kimin Lee</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lee,+S">Sung-Ju Lee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to the 23rd ACM Conference on Embedded Networked Sensor Systems (ACM SenSys 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Self-supervised learning has emerged as a method for utilizing massive unlabeled data for pre-training models, providing an effective feature extractor for various mobile sensing applications. However, when deployed to end-users, these models encounter significant domain shifts attributed to user diversity. We investigate the performance degradation that occurs when self-supervised models are fine-tuned in heterogeneous domains. To address the issue, we propose SelfReplay, a few-shot domain adaptation framework for personalizing self-supervised models. SelfReplay proposes self-supervised meta-learning for initial model pre-training, followed by a user-side model adaptation by replaying the self-supervision with user-specific data. This allows models to adjust their pre-trained representations to the user with only a few samples. Evaluation with four benchmarks demonstrates that SelfReplay outperforms existing baselines by an average F1-score of 8.8%p. Our on-device computational overhead analysis on a commodity off-the-shelf (COTS) smartphone shows that SelfReplay completes adaptation within an unobtrusive latency (in three minutes) with only a 9.54% memory consumption, demonstrating the computational efficiency of the proposed method. </p> </div> </dd> <dt> <a name='item51'>[51]</a> <a href ="/abs/2405.02741" title="Abstract" id="2405.02741"> arXiv:2405.02741 </a> (replaced) [<a href="/pdf/2405.02741" title="Download PDF" id="pdf-2405.02741" aria-labelledby="pdf-2405.02741">pdf</a>, <a href="/format/2405.02741" title="Other formats" id="oth-2405.02741" aria-labelledby="oth-2405.02741">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Activity Detection for Massive Random Access using Covariance-based Matching Pursuit </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Marata,+L">Leatile Marata</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ollila,+E">Esa Ollila</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alves,+H">Hirley Alves</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Under review with IEEE TVT </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> The Internet of Things paradigm heavily relies on a network of a massive number of machine-type devices (MTDs) that monitor various phenomena. Consequently, MTDs are randomly activated at different times whenever a change occurs. In general, fewer MTDs are simultaneously activated across the network, resembling targeted sampling in compressed sensing. Therefore, signal recovery in machine-type communications is addressed through joint user activity detection and channel estimation algorithms built using compressed sensing theory. However, most of these algorithms follow a two-stage procedure in which a channel is first estimated and later mapped to find active users. This approach is inefficient because the estimated channel information is subsequently discarded. To overcome this limitation, we introduce a novel covariance-learning matching pursuit (CL-MP) algorithm that bypasses explicit channel estimation. Instead, it focuses on estimating the indices of the active users greedily. Simulation results presented in terms of probability of miss detection, exact recovery rate, and computational complexity validate the proposed technique's superior performance and efficiency. </p> </div> </dd> <dt> <a name='item52'>[52]</a> <a href ="/abs/2407.03661" title="Abstract" id="2407.03661"> arXiv:2407.03661 </a> (replaced) [<a href="/pdf/2407.03661" title="Download PDF" id="pdf-2407.03661" aria-labelledby="pdf-2407.03661">pdf</a>, <a href="https://arxiv.org/html/2407.03661v3" title="View HTML" id="html-2407.03661" aria-labelledby="html-2407.03661" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.03661" title="Other formats" id="oth-2407.03661" aria-labelledby="oth-2407.03661">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Where's That Voice Coming? Continual Learning for Sound Source Localization </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Xiao,+Y">Yang Xiao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Das,+R+K">Rohan Kumar Das</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ICME 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Sound (cs.SD) </div> <p class='mathjax'> Sound source localization (SSL) is essential for many speech-processing applications. Deep learning models have achieved high performance, but often fail when the training and inference environments differ. Adapting SSL models to dynamic acoustic conditions faces a major challenge: catastrophic forgetting. In this work, we propose an exemplar-free continual learning strategy for SSL (CL-SSL) to address such a forgetting phenomenon. CL-SSL applies task-specific sub-networks to adapt across diverse acoustic environments while retaining previously learned knowledge. It also uses a scaling mechanism to limit parameter growth, ensuring consistent performance across incremental tasks. We evaluated CL-SSL on simulated data with varying microphone distances and real-world data with different noise levels. The results demonstrate CL-SSL's ability to maintain high accuracy with minimal parameter increase, offering an efficient solution for SSL applications. </p> </div> </dd> <dt> <a name='item53'>[53]</a> <a href ="/abs/2409.00101" title="Abstract" id="2409.00101"> arXiv:2409.00101 </a> (replaced) [<a href="/pdf/2409.00101" title="Download PDF" id="pdf-2409.00101" aria-labelledby="pdf-2409.00101">pdf</a>, <a href="https://arxiv.org/html/2409.00101v3" title="View HTML" id="html-2409.00101" aria-labelledby="html-2409.00101" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.00101" title="Other formats" id="oth-2409.00101" aria-labelledby="oth-2409.00101">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> NeuroLM: A Universal Multi-task Foundation Model for Bridging the Gap between Language and EEG Signals </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Jiang,+W">Wei-Bang Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Y">Yansen Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lu,+B">Bao-Liang Lu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+D">Dongsheng Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The Thirteenth International Conference on Learning Representations </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> The Thirteenth International Conference on Learning Representations, 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Human-Computer Interaction (cs.HC); Machine Learning (cs.LG) </div> <p class='mathjax'> Recent advancements for large-scale pre-training with neural signals such as electroencephalogram (EEG) have shown promising results, significantly boosting the development of brain-computer interfaces (BCIs) and healthcare. However, these pre-trained models often require full fine-tuning on each downstream task to achieve substantial improvements, limiting their versatility and usability, and leading to considerable resource wastage. To tackle these challenges, we propose NeuroLM, the first multi-task foundation model that leverages the capabilities of Large Language Models (LLMs) by regarding EEG signals as a foreign language, endowing the model with multi-task learning and inference capabilities. Our approach begins with learning a text-aligned neural tokenizer through vector-quantized temporal-frequency prediction, which encodes EEG signals into discrete neural tokens. These EEG tokens, generated by the frozen vector-quantized (VQ) encoder, are then fed into an LLM that learns causal EEG information via multi-channel autoregression. Consequently, NeuroLM can understand both EEG and language modalities. Finally, multi-task instruction tuning adapts NeuroLM to various downstream tasks. We are the first to demonstrate that, by specific incorporation with LLMs, NeuroLM unifies diverse EEG tasks within a single model through instruction tuning. The largest variant NeuroLM-XL has record-breaking 1.7B parameters for EEG signal processing, and is pre-trained on a large-scale corpus comprising approximately 25,000-hour EEG data. When evaluated on six diverse downstream datasets, NeuroLM showcases the huge potential of this multi-task learning paradigm. </p> </div> </dd> <dt> <a name='item54'>[54]</a> <a href ="/abs/2409.07771" title="Abstract" id="2409.07771"> arXiv:2409.07771 </a> (replaced) [<a href="/pdf/2409.07771" title="Download PDF" id="pdf-2409.07771" aria-labelledby="pdf-2409.07771">pdf</a>, <a href="https://arxiv.org/html/2409.07771v2" title="View HTML" id="html-2409.07771" aria-labelledby="html-2409.07771" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.07771" title="Other formats" id="oth-2409.07771" aria-labelledby="oth-2409.07771">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Polarforming for Wireless Communications: Modeling and Performance Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+Z">Zijian Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ding,+J">Jingze Ding</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+C">Chenbo Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jiao,+B">Bingli Jiao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+R">Rui Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 11 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> This paper presents, for the first time, the concept of polarforming for wireless communications. Polarforming refers to a novel technique that enables the polarization of an antenna to shape into a desired polarization state for aligning with the polarization of an electromagnetic (EM) wave. It can fully leverage polarization diversity to enhance the performance of wireless communication systems through polarization matching. To implement polarforming, we propose a new paradigm of phase shifter (PS)-based polarization-reconfigurable antennas (PRAs) that can form linear, circular, and general elliptical polarizations by phase shift control. To further demonstrate the benefits of polarforming, we investigate a PRA-aided wireless communication system equipped with tunable polarization of antennas. We characterize the multiple-input multiple-output (MIMO) channel capacity of the considered system as a function of the phase shifts of PS-based PRAs. We also provide a detailed polarforming interpretation under the single-input single-output (SISO) scenario and theoretically show how polarforming differs from the conventional (analog) beamforming based on PSs. Moreover, we develop an alternating optimization approach to maximize the channel capacity for the systems with single-antenna transmitter/receiver. Based on the water-filling principle, we also derive an upper bound on the MIMO channel capacity with PS-based PRAs and then maximize this capacity bound by optimizing the phase shifts through alternating optimization. Finally, comprehensive simulation results are presented, which not only validate the effectiveness of polarforming in combating channel depolarization but also exhibit substantial performance improvements over conventional systems. </p> </div> </dd> <dt> <a name='item55'>[55]</a> <a href ="/abs/2410.02555" title="Abstract" id="2410.02555"> arXiv:2410.02555 </a> (replaced) [<a href="/pdf/2410.02555" title="Download PDF" id="pdf-2410.02555" aria-labelledby="pdf-2410.02555">pdf</a>, <a href="https://arxiv.org/html/2410.02555v2" title="View HTML" id="html-2410.02555" aria-labelledby="html-2410.02555" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.02555" title="Other formats" id="oth-2410.02555" aria-labelledby="oth-2410.02555">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Toward Neuronal Implementations of Delayed Optimal Control </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+J+S">Jing Shuang Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> to appear at 2025 IEEE American Control Conference (ACC) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Neurons and Cognition (q-bio.NC) </div> <p class='mathjax'> Animal sensorimotor behavior is frequently modeled using optimal controllers. However, it is unclear how the neural circuits within the animal's nervous system implement optimal controller-like behavior. In this work, we study the question of implementing a delayed linear quadratic regulator with linear dynamical "neurons" on a muscle model. We show that for any second-order controller, there are three minimal neural circuit configurations that implement the same controller. Furthermore, the firing rate characteristics of each circuit can vary drastically, even as the overall controller behavior is preserved. Along the way, we introduce concepts that bridge controller realizations to neural implementations that are compatible with known neuronal delay structures. </p> </div> </dd> <dt> <a name='item56'>[56]</a> <a href ="/abs/2410.02957" title="Abstract" id="2410.02957"> arXiv:2410.02957 </a> (replaced) [<a href="/pdf/2410.02957" title="Download PDF" id="pdf-2410.02957" aria-labelledby="pdf-2410.02957">pdf</a>, <a href="https://arxiv.org/html/2410.02957v2" title="View HTML" id="html-2410.02957" aria-labelledby="html-2410.02957" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.02957" title="Other formats" id="oth-2410.02957" aria-labelledby="oth-2410.02957">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Human Balancing on a Log: A Switched Multi-Layer Controller </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+J">Jiayi Zhao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+M">Mo Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+J+S">Jing Shuang Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> to appear at 2025 IEEE American Control Conference (ACC) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> We study the task of balancing a human on a log that is fixed in place. Balancing on a log is substantially more challenging than balancing on a flat surface due to increased instability -- nonetheless, we are able to balance by composing simple (e.g., PID, LQR) controllers in a bio-inspired switched multi-layer configuration. The controller consists of an upper-layer LQR planner (akin to the central nervous system) that coordinates ankle and hip torques, and lower-layer PID trackers (akin to local motor units) that follow this plan subject to nonlinear dynamics. The controller switches between three operational modes depending on the state of the human. The efficacy of the controller is verified in simulation, where our controller is able to stabilize the human for a variety of initial conditions and disturbances. We also introduce a controller that outputs muscle activations to perform the same balancing task. </p> </div> </dd> <dt> <a name='item57'>[57]</a> <a href ="/abs/2411.03582" title="Abstract" id="2411.03582"> arXiv:2411.03582 </a> (replaced) [<a href="/pdf/2411.03582" title="Download PDF" id="pdf-2411.03582" aria-labelledby="pdf-2411.03582">pdf</a>, <a href="https://arxiv.org/html/2411.03582v2" title="View HTML" id="html-2411.03582" aria-labelledby="html-2411.03582" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.03582" title="Other formats" id="oth-2411.03582" aria-labelledby="oth-2411.03582">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Privacy Preserving Mechanisms for Coordinating Airspace Usage in Advanced Air Mobility </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Maheshwari,+C">Chinmay Maheshwari</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mendoza,+M+G">Maria G. Mendoza</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tuck,+V+M">Victoria Marie Tuck</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Su,+P">Pan-Yang Su</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Qin,+V+L">Victor L. Qin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Seshia,+S+A">Sanjit A. Seshia</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Balakrishnan,+H">Hamsa Balakrishnan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sastry,+S">Shankar Sastry</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages, 7 figures, 3 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Advanced Air Mobility (AAM) operations are expected to transform air transportation while challenging current air traffic management practices. By introducing a novel market-based mechanism, we address the problem of on-demand allocation of capacity-constrained airspace to AAM vehicles with heterogeneous and private valuations. We model airspace and air infrastructure as a collection of contiguous regions with constraints on the number of vehicles that simultaneously enter, stay, or exit each region. Vehicles request access to the airspace with trajectories spanning multiple regions at different times. We use the graph structure of our airspace model to formulate the allocation problem as a path allocation problem on a time-extended graph. To ensure the cost information of AAM vehicles remains private, we introduce a novel mechanism that allocates each vehicle a budget of "air-credits" and anonymously charges prices for traversing the edges of the time-extended graph. We seek to compute a competitive equilibrium that ensures that: (i) capacity constraints are satisfied, (ii) a strictly positive resource price implies that the sector capacity is fully utilized, and (iii) the allocation is integral and optimal for each AAM vehicle given current prices, without requiring access to individual vehicle utilities. However, a competitive equilibrium with integral allocations may not always exist. We provide sufficient conditions for the existence and computation of a fractional-competitive equilibrium, where allocations can be fractional. Building on these theoretical insights, we propose a distributed, iterative, two-step algorithm that: 1) computes a fractional competitive equilibrium, and 2) derives an integral allocation from this equilibrium. We validate the effectiveness of our approach in allocating trajectories for two emerging urban air mobility services: drone delivery and air taxis. </p> </div> </dd> <dt> <a name='item58'>[58]</a> <a href ="/abs/2411.15144" title="Abstract" id="2411.15144"> arXiv:2411.15144 </a> (replaced) [<a href="/pdf/2411.15144" title="Download PDF" id="pdf-2411.15144" aria-labelledby="pdf-2411.15144">pdf</a>, <a href="/format/2411.15144" title="Other formats" id="oth-2411.15144" aria-labelledby="oth-2411.15144">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Physically Parameterized Differentiable MUSIC for DoA Estimation with Uncalibrated Arrays </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Chatelier,+B">Baptiste Chatelier</a> (INSA Rennes, IETR, MERCE-France), <a href="https://arxiv.org/search/eess?searchtype=author&query=Mateos-Ramos,+J+M">Jos茅 Miguel Mateos-Ramos</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Corlay,+V">Vincent Corlay</a> (MERCE-France), <a href="https://arxiv.org/search/eess?searchtype=author&query=H%C3%A4ger,+C">Christian H盲ger</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Crussi%C3%A8re,+M">Matthieu Crussi猫re</a> (INSA Rennes, IETR), <a href="https://arxiv.org/search/eess?searchtype=author&query=Wymeersch,+H">Henk Wymeersch</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Magoarou,+L+L">Luc Le Magoarou</a> (INSA Rennes, IETR)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Information Theory (cs.IT); Machine Learning (cs.LG) </div> <p class='mathjax'> Direction of arrival (DoA) estimation is a common sensing problem in radar, sonar, audio, and wireless communication systems. It has gained renewed importance with the advent of the integrated sensing and communication paradigm. To fully exploit the potential of such sensing systems, it is crucial to take into account potential hardware impairments that can negatively impact the obtained performance. This study introduces a joint DoA estimation and hardware impairment learning scheme following a model-based approach. Specifically, a differentiable version of the multiple signal classification (MUSIC) algorithm is derived, allowing efficient learning of the considered impairments. The proposed approach supports both supervised and unsupervised learning strategies, showcasing its practical potential. Simulation results indicate that the proposed method successfully learns significant inaccuracies in both antenna locations and complex gains. Additionally, the proposed method outperforms the classical MUSIC algorithm in the DoA estimation task. </p> </div> </dd> <dt> <a name='item59'>[59]</a> <a href ="/abs/2502.03169" title="Abstract" id="2502.03169"> arXiv:2502.03169 </a> (replaced) [<a href="/pdf/2502.03169" title="Download PDF" id="pdf-2502.03169" aria-labelledby="pdf-2502.03169">pdf</a>, <a href="https://arxiv.org/html/2502.03169v3" title="View HTML" id="html-2502.03169" aria-labelledby="html-2502.03169" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.03169" title="Other formats" id="oth-2502.03169" aria-labelledby="oth-2502.03169">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Antenna Position Optimization for Movable Antenna-Empowered Near-Field Sensing </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Y">Yushen Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mei,+W">Weidong Mei</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wei,+X">Xin Wei</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ning,+B">Boyu Ning</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+Z">Zhi Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> Movable antennas (MAs) show great promise for enhancing the sensing capabilities of future sixth-generation (6G) networks. With the growing prevalence of near-field propagation at ultra-high frequencies, this paper focuses on the application of MAs for near-field sensing to jointly estimate the angle and distance information of a target. First, to gain essential insights into MA-enhanced near-field sensing, we investigate two simplified cases with only the spatial angle-of-arrival (AoA) or distance estimation, respectively, assuming that the other information is already known. We derive the worst-case Cramer-Rao bounds (CRBs) on the mean square errors (MSEs) of the AoA estimation and the distance estimation via the multiple signal classification (MUSIC) algorithm in these two cases. Then, we jointly optimize the positions of the MAs within a linear array to minimize these CRBs and derive their closed-form solutions, which yield an identical array geometry to MA-aided far-field sensing. Furthermore, we proceed to the more challenging case with the joint AoA and distance estimation and derive the worst-case CRB under the two-dimensional (2D) MUSIC algorithm. The corresponding CRB minimization problem is efficiently solved by adopting a discrete sampling-based approach. Numerical results demonstrate that the proposed MA-enhanced near-field sensing significantly outperforms conventional sensing with fixed-position antennas (FPAs). Moreover, the joint angle and distance estimation results in a different array geometry from that in the individual estimation of angle or distance. </p> </div> </dd> <dt> <a name='item60'>[60]</a> <a href ="/abs/2502.12990" title="Abstract" id="2502.12990"> arXiv:2502.12990 </a> (replaced) [<a href="/pdf/2502.12990" title="Download PDF" id="pdf-2502.12990" aria-labelledby="pdf-2502.12990">pdf</a>, <a href="https://arxiv.org/html/2502.12990v3" title="View HTML" id="html-2502.12990" aria-labelledby="html-2502.12990" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.12990" title="Other formats" id="oth-2502.12990" aria-labelledby="oth-2502.12990">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Artificial Intelligence-derived Vascular Age from Photoplethysmography: A Novel Digital Biomarker for Cardiovascular Health </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Nie,+G">Guangkun Nie</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+Q">Qinghao Zhao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tang,+G">Gongzheng Tang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+Y">Yaxin Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hong,+S">Shenda Hong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span> </div> <p class='mathjax'> With the increasing availability of wearable devices, photoplethysmography (PPG) has emerged as a promising non-invasive tool for monitoring human hemodynamics. We propose a deep learning framework to estimate vascular age (AI-vascular age) from PPG signals, incorporating a distribution-aware loss to address biases caused by imbalanced data. The model was developed using data from the UK Biobank (UKB), with 98,672 participants in the development cohort and 113,559 participants (144,683 data pairs) for clinical evaluation. After adjusting for key confounders, individuals with a vascular age gap (AI-vascular age minus calendar age) exceeding 9 years had a significantly higher risk of major adverse cardiovascular and cerebrovascular events (MACCE) (HR = 2.37, p < 0.005) and secondary outcomes, including diabetes (HR = 2.69, p < 0.005), hypertension (HR = 2.88, p < 0.005), coronary heart disease (HR = 2.20, p < 0.005), heart failure (HR = 2.15, p < 0.005), myocardial infarction (HR = 2.51, p < 0.005), stroke (HR = 2.55, p < 0.005), and all-cause mortality (HR = 2.51, p < 0.005). Conversely, participants with a vascular age gap below -9 years exhibited a significantly lower incidence of these outcomes. We further evaluated the longitudinal applicability of AI-vascular age using serial PPG data from the UKB, demonstrating its value in risk stratification by leveraging AI-vascular age at two distinct time points to predict future MACCE incidence. External validation was performed on a MIMIC-III-derived cohort (n = 2,343), where each one-year increase in vascular age gap was significantly associated with elevated in-hospital mortality risk (OR = 1.02, p < 0.005). In conclusion, our study establishes AI-vascular age as a novel, non-invasive digital biomarker for cardiovascular health assessment. </p> </div> </dd> <dt> <a name='item61'>[61]</a> <a href ="/abs/2503.13996" title="Abstract" id="2503.13996"> arXiv:2503.13996 </a> (replaced) [<a href="/pdf/2503.13996" title="Download PDF" id="pdf-2503.13996" aria-labelledby="pdf-2503.13996">pdf</a>, <a href="https://arxiv.org/html/2503.13996v2" title="View HTML" id="html-2503.13996" aria-labelledby="html-2503.13996" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13996" title="Other formats" id="oth-2503.13996" aria-labelledby="oth-2503.13996">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Safety Critical Control Under Multiple State and Input Constraints: Volume Control Barrier Function Method </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Dong,+J">Jinyang Dong</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wu,+S">Shizhen Wu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+R">Rui Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liang,+X">Xiao Liang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lu,+B">Biao Lu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fang,+Y">Yongchun Fang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Robotics (cs.RO) </div> <p class='mathjax'> In this paper, the safety-critical control problem for uncertain systems under multiple control barrier function (CBF) constraints and input constraints is investigated. A novel framework is proposed to generate a safety filter that minimizes changes to reference inputs when safety risks arise, ensuring a balance between safety and performance. A nonlinear disturbance observer (DOB) based on the robust integral of the sign of the error (RISE) is used to estimate system uncertainties, ensuring that the estimation error converges to zero exponentially. This error bound is integrated into the safety-critical controller to reduce conservativeness while ensuring safety. To further address the challenges arising from multiple CBF and input constraints, a novel Volume CBF (VCBF) is proposed by analyzing the feasible space of the quadratic programming (QP) problem. % ensuring solution feasibility by keeping the volume as a positive value. To ensure that the feasible space does not vanish under disturbances, a DOB-VCBF-based method is introduced, ensuring system safety while maintaining the feasibility of the resulting QP. Subsequently, several groups of simulation and experimental results are provided to validate the effectiveness of the proposed controller. </p> </div> </dd> <dt> <a name='item62'>[62]</a> <a href ="/abs/2503.14523" title="Abstract" id="2503.14523"> arXiv:2503.14523 </a> (replaced) [<a href="/pdf/2503.14523" title="Download PDF" id="pdf-2503.14523" aria-labelledby="pdf-2503.14523">pdf</a>, <a href="https://arxiv.org/html/2503.14523v2" title="View HTML" id="html-2503.14523" aria-labelledby="html-2503.14523" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14523" title="Other formats" id="oth-2503.14523" aria-labelledby="oth-2503.14523">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SDF-TopoNet: A Two-Stage Framework for Tubular Structure Segmentation via SDF Pre-training and Topology-Aware Fine-Tuning </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wu,+S">Siyi Wu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+L">Leyi Zhao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ma,+H">Haotian Ma</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Song,+X">Xinyuan Song</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Accurate segmentation of tubular and curvilinear structures, such as blood vessels, neurons, and road networks, is crucial in various applications. A key challenge is ensuring topological correctness while maintaining computational efficiency. Existing approaches often employ topological loss functions based on persistent homology, such as Betti error, to enforce structural consistency. However, these methods suffer from high computational costs and are insensitive to pixel-level accuracy, often requiring additional loss terms like Dice or MSE to compensate. To address these limitations, we propose \textbf{SDF-TopoNet}, an improved topology-aware segmentation framework that enhances both segmentation accuracy and training efficiency. Our approach introduces a novel two-stage training strategy. In the pre-training phase, we utilize the signed distance function (SDF) as an auxiliary learning target, allowing the model to encode topological information without directly relying on computationally expensive topological loss functions. In the fine-tuning phase, we incorporate a dynamic adapter alongside a refined topological loss to ensure topological correctness while mitigating overfitting and computational overhead. We evaluate our method on five benchmark datasets. Experimental results demonstrate that SDF-TopoNet outperforms existing methods in both topological accuracy and quantitative segmentation metrics, while significantly reducing training complexity. </p> </div> </dd> <dt> <a name='item63'>[63]</a> <a href ="/abs/2503.14538" title="Abstract" id="2503.14538"> arXiv:2503.14538 </a> (replaced) [<a href="/pdf/2503.14538" title="Download PDF" id="pdf-2503.14538" aria-labelledby="pdf-2503.14538">pdf</a>, <a href="https://arxiv.org/html/2503.14538v2" title="View HTML" id="html-2503.14538" aria-labelledby="html-2503.14538" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14538" title="Other formats" id="oth-2503.14538" aria-labelledby="oth-2503.14538">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Vision-Language Models for Acute Tuberculosis Diagnosis: A Multimodal Approach Combining Imaging and Clinical Data </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Ganapthy,+A">Ananya Ganapthy</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Shastry,+P">Praveen Shastry</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kumarasami,+N">Naveen Kumarasami</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=D,+A">Anandakumar D</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=R,+K">Keerthana R</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=M,+M">Mounigasri M</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=M,+V">Varshinipriya M</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Venkatesh,+K+P">Kishore Prasath Venkatesh</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Subramanian,+B">Bargava Subramanian</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sivasailam,+K">Kalyan Sivasailam</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Background: This study introduces a Vision-Language Model (VLM) leveraging SIGLIP and Gemma-3b architectures for automated acute tuberculosis (TB) screening. By integrating chest X-ray images and clinical notes, the model aims to enhance diagnostic accuracy and efficiency, particularly in resource-limited settings. <br>Methods: The VLM combines visual data from chest X-rays with clinical context to generate detailed, context-aware diagnostic reports. The architecture employs SIGLIP for visual encoding and Gemma-3b for decoding, ensuring effective representation of acute TB-specific pathologies and clinical insights. <br>Results: Key acute TB pathologies, including consolidation, cavities, and nodules, were detected with high precision (97percent) and recall (96percent). The model demonstrated strong spatial localization capabilities and robustness in distinguishing TB-positive cases, making it a reliable tool for acute TB diagnosis. <br>Conclusion: The multimodal capability of the VLM reduces reliance on radiologists, providing a scalable solution for acute TB screening. Future work will focus on improving the detection of subtle pathologies and addressing dataset biases to enhance its generalizability and application in diverse global healthcare settings. </p> </div> </dd> <dt> <a name='item64'>[64]</a> <a href ="/abs/2211.14312" title="Abstract" id="2211.14312"> arXiv:2211.14312 </a> (replaced) [<a href="/pdf/2211.14312" title="Download PDF" id="pdf-2211.14312" aria-labelledby="pdf-2211.14312">pdf</a>, <a href="https://arxiv.org/html/2211.14312v4" title="View HTML" id="html-2211.14312" aria-labelledby="html-2211.14312" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2211.14312" title="Other formats" id="oth-2211.14312" aria-labelledby="oth-2211.14312">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Karyotype AI for Precision Oncology </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Shamsi,+Z">Zahra Shamsi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Bryant,+D">Drew Bryant</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wilson,+J">Jacob Wilson</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Qu,+X">Xiaoyu Qu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Dubey,+A">Avinava Dubey</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Kothari,+K">Konik Kothari</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Dehghani,+M">Mostafa Dehghani</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Chavarha,+M">Mariya Chavarha</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Likhosherstov,+V">Valerii Likhosherstov</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Williams,+B">Brian Williams</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Frumkin,+M">Michael Frumkin</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Appelbaum,+F">Fred Appelbaum</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Choromanski,+K">Krzysztof Choromanski</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Bashir,+A">Ali Bashir</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Fang,+M">Min Fang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantitative Methods (q-bio.QM)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG); Image and Video Processing (eess.IV) </div> <p class='mathjax'> We present a machine learning method capable of accurately detecting chromosome abnormalities that cause blood cancers directly from microscope images of the metaphase stage of cell division. The pipeline is built on a series of fine-tuned Vision Transformers. Current state of the art (and standard clinical practice) requires expensive, manual expert analysis, whereas our pipeline takes only 15 seconds per metaphase image. Using a novel pretraining-finetuning strategy to mitigate the challenge of data scarcity, we achieve a high precision-recall score of 94% AUC for the clinically significant del(5q) and t(9;22) anomalies. Our method also unlocks zero-shot detection of rare aberrations based on model latent embeddings. The ability to quickly, accurately, and scalably diagnose genetic abnormalities directly from metaphase images could transform karyotyping practice and improve patient outcomes. We will make code publicly available. </p> </div> </dd> <dt> <a name='item65'>[65]</a> <a href ="/abs/2401.10288" title="Abstract" id="2401.10288"> arXiv:2401.10288 </a> (replaced) [<a href="/pdf/2401.10288" title="Download PDF" id="pdf-2401.10288" aria-labelledby="pdf-2401.10288">pdf</a>, <a href="https://arxiv.org/html/2401.10288v2" title="View HTML" id="html-2401.10288" aria-labelledby="html-2401.10288" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2401.10288" title="Other formats" id="oth-2401.10288" aria-labelledby="oth-2401.10288">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Self-supervised New Activity Detection in Sensor-based Smart Environments </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+H">Hyunju Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+D">Dongman Lee</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> With the rapid advancement of ubiquitous computing technology, human activity analysis based on time series data from a diverse range of sensors enables the delivery of more intelligent services. Despite the importance of exploring new activities in real-world scenarios, existing human activity recognition studies generally rely on predefined known activities and often overlook detecting new patterns (novelties) that have not been previously observed during training. Novelty detection in human activities becomes even more challenging due to (1) diversity of patterns within the same known activity, (2) shared patterns between known and new activities, and (3) differences in sensor properties of each activity dataset. We introduce CLAN, a two-tower model that leverages Contrastive Learning with diverse data Augmentation for New activity detection in sensor-based environments. CLAN simultaneously and explicitly utilizes multiple types of strongly shifted data as negative samples in contrastive learning, effectively learning invariant representations that adapt to various pattern variations within the same activity. To enhance the ability to distinguish between known and new activities that share common features, CLAN incorporates both time and frequency domains, enabling the learning of multi-faceted discriminative representations. Additionally, we design an automatic selection mechanism of data augmentation methods tailored to each dataset's properties, generating appropriate positive and negative pairs for contrastive learning. Comprehensive experiments on real-world datasets show that CLAN achieves a 9.24% improvement in AUROC compared to the best-performing baseline model. </p> </div> </dd> <dt> <a name='item66'>[66]</a> <a href ="/abs/2407.06705" title="Abstract" id="2407.06705"> arXiv:2407.06705 </a> (replaced) [<a href="/pdf/2407.06705" title="Download PDF" id="pdf-2407.06705" aria-labelledby="pdf-2407.06705">pdf</a>, <a href="https://arxiv.org/html/2407.06705v2" title="View HTML" id="html-2407.06705" aria-labelledby="html-2407.06705" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.06705" title="Other formats" id="oth-2407.06705" aria-labelledby="oth-2407.06705">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Integrating Atmospheric Sensing and Communications for Resource Allocation in NTNs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Leyva-Mayorga,+I">Israel Leyva-Mayorga</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Saggese,+F">Fabio Saggese</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+L">Lintao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Popovski,+P">Petar Popovski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted for publication to IEEE Transactions on Wireless Communications </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> The integration of Non-Terrestrial Networks (NTNs) with Low Earth Orbit (LEO) satellite constellations into 5G and Beyond is essential to achieve truly global connectivity. A distinctive characteristic of LEO mega constellations is that they constitute a global infrastructure with predictable dynamics, which enables the pre-planned allocation of radio resources. However, the different bands that can be used for ground-to-satellite communication are affected differently by atmospheric conditions such as precipitation, which introduces uncertainty on the attenuation of the communication links at high frequencies. Based on this, we present a compelling case for applying integrated sensing and communications (ISAC) in heterogeneous and multi-layer LEO satellite constellations over wide areas. Specifically, we propose a sensing-assisted communications framework and frame structure that not only enables the accurate estimation of the atmospheric attenuation in the communication links through sensing but also leverages this information to determine the optimal serving satellites and allocate resources efficiently for downlink communication with users on the ground. The results show that, by dedicating an adequate amount of resources for sensing and solving the association and resource allocation problems jointly, it is feasible to increase the average throughput by 59% and the fairness by 700% when compared to solving these problems separately. </p> </div> </dd> <dt> <a name='item67'>[67]</a> <a href ="/abs/2407.07719" title="Abstract" id="2407.07719"> arXiv:2407.07719 </a> (replaced) [<a href="/pdf/2407.07719" title="Download PDF" id="pdf-2407.07719" aria-labelledby="pdf-2407.07719">pdf</a>, <a href="/format/2407.07719" title="Other formats" id="oth-2407.07719" aria-labelledby="oth-2407.07719">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model-based learning for multi-antenna multi-frequency location-to-channel mapping </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chatelier,+B">Baptiste Chatelier</a> (IETR, MERCE-France, INSA Rennes), <a href="https://arxiv.org/search/cs?searchtype=author&query=Corlay,+V">Vincent Corlay</a> (MERCE-France), <a href="https://arxiv.org/search/cs?searchtype=author&query=Crussi%C3%A8re,+M">Matthieu Crussi猫re</a> (IETR, INSA Rennes), <a href="https://arxiv.org/search/cs?searchtype=author&query=Magoarou,+L+L">Luc Le Magoarou</a> (IETR, INSA Rennes)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Signal Processing (eess.SP) </div> <p class='mathjax'> Years of study of the propagation channel showed a close relation between a location and the associated communication channel response. The use of a neural network to learn the location-to-channel mapping can therefore be envisioned. The Implicit Neural Representation (INR) literature showed that classical neural architecture are biased towards learning low-frequency content, making the location-to-channel mapping learning a non-trivial problem. Indeed, it is well known that this mapping is a function rapidly varying with the location, on the order of the wavelength. This paper leverages the model-based machine learning paradigm to derive a problem-specific neural architecture from a propagation channel model. The resulting architecture efficiently overcomes the spectral-bias issue. It only learns low-frequency sparse correction terms activating a dictionary of high-frequency components. The proposed architecture is evaluated against classical INR architectures on realistic synthetic data, showing much better accuracy. Its mapping learning performance is explained based on the approximated channel model, highlighting the explainability of the model-based machine learning paradigm. </p> </div> </dd> <dt> <a name='item68'>[68]</a> <a href ="/abs/2410.20081" title="Abstract" id="2410.20081"> arXiv:2410.20081 </a> (replaced) [<a href="/pdf/2410.20081" title="Download PDF" id="pdf-2410.20081" aria-labelledby="pdf-2410.20081">pdf</a>, <a href="https://arxiv.org/html/2410.20081v3" title="View HTML" id="html-2410.20081" aria-labelledby="html-2410.20081" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.20081" title="Other formats" id="oth-2410.20081" aria-labelledby="oth-2410.20081">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> emg2qwerty: A Large Dataset with Baselines for Touch Typing using Surface Electromyography </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sivakumar,+V">Viswanath Sivakumar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seely,+J">Jeffrey Seely</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+A">Alan Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bittner,+S+R">Sean R Bittner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Berenzweig,+A">Adam Berenzweig</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bolarinwa,+A">Anuoluwapo Bolarinwa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gramfort,+A">Alexandre Gramfort</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mandel,+M+I">Michael I Mandel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published at NeurIPS 2024 Datasets and Benchmarks Track </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Human-Computer Interaction (cs.HC); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Surface electromyography (sEMG) non-invasively measures signals generated by muscle activity with sufficient sensitivity to detect individual spinal neurons and richness to identify dozens of gestures and their nuances. Wearable wrist-based sEMG sensors have the potential to offer low friction, subtle, information rich, always available human-computer inputs. To this end, we introduce emg2qwerty, a large-scale dataset of non-invasive electromyographic signals recorded at the wrists while touch typing on a QWERTY keyboard, together with ground-truth annotations and reproducible baselines. With 1,135 sessions spanning 108 users and 346 hours of recording, this is the largest such public dataset to date. These data demonstrate non-trivial, but well defined hierarchical relationships both in terms of the generative process, from neurons to muscles and muscle combinations, as well as in terms of domain shift across users and user sessions. Applying standard modeling techniques from the closely related field of Automatic Speech Recognition (ASR), we show strong baseline performance on predicting key-presses using sEMG signals alone. We believe the richness of this task and dataset will facilitate progress in several problems of interest to both the machine learning and neuroscientific communities. Dataset and code can be accessed at <a href="https://github.com/facebookresearch/emg2qwerty" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item69'>[69]</a> <a href ="/abs/2411.13922" title="Abstract" id="2411.13922"> arXiv:2411.13922 </a> (replaced) [<a href="/pdf/2411.13922" title="Download PDF" id="pdf-2411.13922" aria-labelledby="pdf-2411.13922">pdf</a>, <a href="https://arxiv.org/html/2411.13922v2" title="View HTML" id="html-2411.13922" aria-labelledby="html-2411.13922" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13922" title="Other formats" id="oth-2411.13922" aria-labelledby="oth-2411.13922">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exponentially Consistent Nonparametric Linkage-Based Clustering of Data Sequences </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Singh,+B">Bhupender Singh</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Rajagopalan,+A+R">Ananth Ram Rajagopalan</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bhashyam,+S">Srikrishna Bhashyam</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Information Theory (cs.IT); Machine Learning (cs.LG); Signal Processing (eess.SP) </div> <p class='mathjax'> In this paper, we consider nonparametric clustering of $M$ independent and identically distributed (i.i.d.) data sequences generated from {\em unknown} distributions. The distributions of the $M$ data sequences belong to $K$ underlying distribution clusters. Existing results on exponentially consistent nonparametric clustering algorithms, like single linkage-based (SLINK) clustering and $k$-medoids distribution clustering, assume that the maximum intra-cluster distance ($d_L$) is smaller than the minimum inter-cluster distance ($d_H$). First, in the fixed sample size (FSS) setting, we show that exponential consistency can be achieved for SLINK clustering under a less strict assumption, $d_I < d_H$, where $d_I$ is the maximum distance between any two sub-clusters of a cluster that partition the cluster. Note that $d_I < d_L$ in general. Thus, our results show that SLINK is exponentially consistent for a larger class of problems than previously known. In our simulations, we also identify examples where $k$-medoids clustering is unable to find the true clusters, but SLINK is exponentially consistent. Then, we propose a sequential clustering algorithm, named SLINK-SEQ, based on SLINK and prove that it is also exponentially consistent. Simulation results show that the SLINK-SEQ algorithm requires fewer expected number of samples than the FSS SLINK algorithm for the same probability of error. </p> </div> </dd> <dt> <a name='item70'>[70]</a> <a href ="/abs/2411.14501" title="Abstract" id="2411.14501"> arXiv:2411.14501 </a> (replaced) [<a href="/pdf/2411.14501" title="Download PDF" id="pdf-2411.14501" aria-labelledby="pdf-2411.14501">pdf</a>, <a href="https://arxiv.org/html/2411.14501v4" title="View HTML" id="html-2411.14501" aria-labelledby="html-2411.14501" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14501" title="Other formats" id="oth-2411.14501" aria-labelledby="oth-2411.14501">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> U-Motion: Learned Point Cloud Video Compression with U-Structured Temporal Context Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fan,+T">Tingyu Fan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yueyu Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+R">Ran Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yao Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Image and Video Processing (eess.IV) </div> <p class='mathjax'> Point cloud video (PCV) is a versatile 3D representation of dynamic scenes with emerging applications. This paper introduces U-Motion, a learning-based compression scheme for both PCV geometry and attributes. We propose a U-Structured inter-frame prediction framework, U-Inter, which performs explicit motion estimation and compensation (ME/MC) at different scales with varying levels of detail. It integrates Top-Down (Fine-to-Coarse) Motion Propagation, Bottom-Up Motion Predictive Coding and Multi-scale Group Motion Compensation to enable accurate motion estimation and efficient motion compression at each scale. In addition, we design a multi-scale spatial-temporal predictive coding module to capture the cross-scale spatial redundancy remaining after U-Inter prediction. We conduct experiments following the MPEG Common Test Condition for dense dynamic point clouds and demonstrate that U-Motion can achieve significant gains over MPEG G-PCC-GesTM v3.0 and recently published learning-based methods for both geometry and attribute compression. </p> </div> </dd> <dt> <a name='item71'>[71]</a> <a href ="/abs/2412.06662" title="Abstract" id="2412.06662"> arXiv:2412.06662 </a> (replaced) [<a href="/pdf/2412.06662" title="Download PDF" id="pdf-2412.06662" aria-labelledby="pdf-2412.06662">pdf</a>, <a href="https://arxiv.org/html/2412.06662v2" title="View HTML" id="html-2412.06662" aria-labelledby="html-2412.06662" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.06662" title="Other formats" id="oth-2412.06662" aria-labelledby="oth-2412.06662">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Stochastic LQR Design With Disturbance Preview </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Liu,+J">Jietian Liu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Lessard,+L">Laurent Lessard</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Seiler,+P">Peter Seiler</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> This paper considers the discrete-time, stochastic LQR problem with $p$ steps of disturbance preview information where $p$ is finite. We first derive the solution for this problem on a finite horizon with linear, time-varying dynamics and time-varying costs. Next, we derive the solution on the infinite horizon with linear, time-invariant dynamics and time-invariant costs. Our proofs rely on the well-known principle of optimality. We provide an independent proof for the principle of optimality that relies only on nested information structure. Finally, we show that the finite preview controller converges to the optimal noncausal controller as the preview horizon $p$ tends to infinity. We also provide a simple example to illustrate both the finite and infinite horizon results. </p> </div> </dd> <dt> <a name='item72'>[72]</a> <a href ="/abs/2503.05931" title="Abstract" id="2503.05931"> arXiv:2503.05931 </a> (replaced) [<a href="/pdf/2503.05931" title="Download PDF" id="pdf-2503.05931" aria-labelledby="pdf-2503.05931">pdf</a>, <a href="https://arxiv.org/html/2503.05931v2" title="View HTML" id="html-2503.05931" aria-labelledby="html-2503.05931" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.05931" title="Other formats" id="oth-2503.05931" aria-labelledby="oth-2503.05931">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Training and Inference Efficiency of Encoder-Decoder Speech Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=%C5%BBelasko,+P">Piotr 呕elasko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dhawan,+K">Kunal Dhawan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Galvez,+D">Daniel Galvez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Puvvada,+K+C">Krishna C. Puvvada</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pasad,+A">Ankita Pasad</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koluguri,+N+R">Nithin Rao Koluguri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+K">Ke Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lavrukhin,+V">Vitaly Lavrukhin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Balam,+J">Jagadeesh Balam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ginsburg,+B">Boris Ginsburg</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Attention encoder-decoder model architecture is the backbone of several recent top performing foundation speech models: Whisper, Seamless, OWSM, and Canary-1B. However, the reported data and compute requirements for their training are prohibitive for many in the research community. In this work, we focus on the efficiency angle and ask the questions of whether we are training these speech models efficiently, and what can we do to improve? We argue that a major, if not the most severe, detrimental factor for training efficiency is related to the sampling strategy of sequential data. We show that negligence in mini-batch sampling leads to more than 50% computation being spent on padding. To that end, we study, profile, and optimize Canary-1B training to show gradual improvement in GPU utilization leading up to 5x increase in average batch sizes versus its original training settings. This in turn allows us to train an equivalent model using 4x less GPUs in the same wall time, or leverage the original resources and train it in 2x shorter wall time. Finally, we observe that the major inference bottleneck lies in the autoregressive decoder steps. We find that adjusting the model architecture to transfer model parameters from the decoder to the encoder results in a 3x inference speedup as measured by inverse real-time factor (RTFx) while preserving the accuracy and compute requirements for convergence. The training code and models will be available as open-source. </p> </div> </dd> <dt> <a name='item73'>[73]</a> <a href ="/abs/2503.07667" title="Abstract" id="2503.07667"> arXiv:2503.07667 </a> (replaced) [<a href="/pdf/2503.07667" title="Download PDF" id="pdf-2503.07667" aria-labelledby="pdf-2503.07667">pdf</a>, <a href="https://arxiv.org/html/2503.07667v2" title="View HTML" id="html-2503.07667" aria-labelledby="html-2503.07667" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.07667" title="Other formats" id="oth-2503.07667" aria-labelledby="oth-2503.07667">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CLIMB: Data Foundations for Large Scale Multimodal Clinical Foundation Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+W">Wei Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+P">Peilin Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+M">Malinda Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+D">Daniel Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+H">Haowen Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cui,+H">Hejie Cui</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+P+P">Paul Pu Liang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Signal Processing (eess.SP) </div> <p class='mathjax'> Recent advances in clinical AI have enabled remarkable progress across many clinical domains. However, existing benchmarks and models are primarily limited to a small set of modalities and tasks, which hinders the development of large-scale multimodal methods that can make holistic assessments of patient health and well-being. To bridge this gap, we introduce Clinical Large-Scale Integrative Multimodal Benchmark (CLIMB), a comprehensive clinical benchmark unifying diverse clinical data across imaging, language, temporal, and graph modalities. CLIMB comprises 4.51 million patient samples totaling 19.01 terabytes distributed across 2D imaging, 3D video, time series, graphs, and multimodal data. Through extensive empirical evaluation, we demonstrate that multitask pretraining significantly improves performance on understudied domains, achieving up to 29% improvement in ultrasound and 23% in ECG analysis over single-task learning. Pretraining on CLIMB also effectively improves models' generalization capability to new tasks, and strong unimodal encoder performance translates well to multimodal performance when paired with task-appropriate fusion strategies. Our findings provide a foundation for new architecture designs and pretraining strategies to advance clinical AI research. Code is released at <a href="https://github.com/DDVD233/climb" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item74'>[74]</a> <a href ="/abs/2503.12461" title="Abstract" id="2503.12461"> arXiv:2503.12461 </a> (replaced) [<a href="/pdf/2503.12461" title="Download PDF" id="pdf-2503.12461" aria-labelledby="pdf-2503.12461">pdf</a>, <a href="https://arxiv.org/html/2503.12461v2" title="View HTML" id="html-2503.12461" aria-labelledby="html-2503.12461" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12461" title="Other formats" id="oth-2503.12461" aria-labelledby="oth-2503.12461">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MambaIC: State Space Models for High-Performance Learned Image Compression </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+F">Fanhu Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+H">Hao Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+Y">Yihua Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+S">Siyu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+L">Ling Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yan Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Image and Video Processing (eess.IV) </div> <p class='mathjax'> A high-performance image compression algorithm is crucial for real-time information transmission across numerous fields. Despite rapid progress in image compression, computational inefficiency and poor redundancy modeling still pose significant bottlenecks, limiting practical applications. Inspired by the effectiveness of state space models (SSMs) in capturing long-range dependencies, we leverage SSMs to address computational inefficiency in existing methods and improve image compression from multiple perspectives. In this paper, we integrate the advantages of SSMs for better efficiency-performance trade-off and propose an enhanced image compression approach through refined context modeling, which we term MambaIC. Specifically, we explore context modeling to adaptively refine the representation of hidden states. Additionally, we introduce window-based local attention into channel-spatial entropy modeling to reduce potential spatial redundancy during compression, thereby increasing efficiency. Comprehensive qualitative and quantitative results validate the effectiveness and efficiency of our approach, particularly for high-resolution image compression. Code is released at <a href="https://github.com/AuroraZengfh/MambaIC" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item75'>[75]</a> <a href ="/abs/2503.13801" title="Abstract" id="2503.13801"> arXiv:2503.13801 </a> (replaced) [<a href="/pdf/2503.13801" title="Download PDF" id="pdf-2503.13801" aria-labelledby="pdf-2503.13801">pdf</a>, <a href="https://arxiv.org/html/2503.13801v2" title="View HTML" id="html-2503.13801" aria-labelledby="html-2503.13801" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13801" title="Other formats" id="oth-2503.13801" aria-labelledby="oth-2503.13801">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SCAN-BEST: Efficient Sub-6GHz-Aided Near-field Beam Selection with Formal Reliability Guarantees </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+W">Weicao Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+B">Binpu Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+M">Min Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Simeone,+O">Osvaldo Simeone</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 11 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> As millimeter-wave (mmWave) multiple-input multiple-output (MIMO) systems continue to incorporate larger antenna arrays, the range of near-field propagation expands, making it more likely for users close to the transmitter to fall within the near-field regime. Traditional far-field beam training methods are no longer effective in this context. Additionally, near-field beam training presents challenges, since the training codebook must account for both angular and distance dimensions, leading to large codebook sizes. To reduce the in-band training overhead, we propose the Sub-6G Channel-Aided Near-field BEam SelecTion (SCAN-BEST) framework, which is motivated by the spatial-temporal congruence between sub-6 GHz (sub-6G) and mmWave channels. SCAN-BEST utilizes preprocessed sub-6G channel estimates as input, and employs a convolutional neural network (CNN) to predict the probability of each beam being optimal within the near-field beam training codebook. Given the prediction uncertainty arising from the variance between sub-6G and mmWave channels, we introduce a conformal risk control (CRC)-based module that generates a set of beam candidates for further limited in-band training, enabling the final beam selection to formally meet user-defined target coverage rate. Numerical results confirm the thereoretical properties of SCAN-BEST in terms of the achieved coverage rate of the beam candidates and various metrics. Moreover, SCAN-BEST enjoys good scalability and robustness to various sub-6G system configurations, including to the sizes of calibration datasets. </p> </div> </dd> </dl> <div class='paging'>Total of 75 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/eess/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>