CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 160 results for author: <span class="mathjax">Liu, B</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&amp;query=Liu%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Liu, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Liu%2C+B&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Liu, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10004">arXiv:2411.10004</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10004">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EyeDiff: text-to-image diffusion model improves rare eye disease diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+R">Ruoyu Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+W">Weiyi Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bowen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+X">Xiaolan Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+P">Pusheng Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+S">Shunming Liu</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+M">Mingguang He</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+D">Danli Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10004v1-abstract-short" style="display: inline;"> The rising prevalence of vision-threatening retinal diseases poses a significant burden on the global healthcare systems. Deep learning (DL) offers a promising solution for automatic disease screening but demands substantial data. Collecting and labeling large volumes of ophthalmic images across various modalities encounters several real-world challenges, especially for rare diseases. Here, we int&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10004v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10004v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10004v1-abstract-full" style="display: none;"> The rising prevalence of vision-threatening retinal diseases poses a significant burden on the global healthcare systems. Deep learning (DL) offers a promising solution for automatic disease screening but demands substantial data. Collecting and labeling large volumes of ophthalmic images across various modalities encounters several real-world challenges, especially for rare diseases. Here, we introduce EyeDiff, a text-to-image model designed to generate multimodal ophthalmic images from natural language prompts and evaluate its applicability in diagnosing common and rare diseases. EyeDiff is trained on eight large-scale datasets using the advanced latent diffusion model, covering 14 ophthalmic image modalities and over 80 ocular diseases, and is adapted to ten multi-country external datasets. The generated images accurately capture essential lesional characteristics, achieving high alignment with text prompts as evaluated by objective metrics and human experts. Furthermore, integrating generated images significantly enhances the accuracy of detecting minority classes and rare eye diseases, surpassing traditional oversampling methods in addressing data imbalance. EyeDiff effectively tackles the issue of data imbalance and insufficiency typically encountered in rare diseases and addresses the challenges of collecting large-scale annotated images, offering a transformative solution to enhance the development of expert-level diseases diagnosis models in ophthalmic field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10004v1-abstract-full').style.display = 'none'; document.getElementById('2411.10004v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09868">arXiv:2411.09868</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09868">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Phase Transitions with Structured Sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Huiguang Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Baoguo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09868v1-abstract-short" style="display: inline;"> In the field of signal processing, phase transition phenomena have recently attracted great attention. Donoho&#39;s work established the signal recovery threshold using indicators such as restricted isotropy (RIP) and incoherence and proved that phase transition phenomena occur in compressed sampling. Nevertheless, the phase transition phenomenon of structured sparse signals remains unclear, and these&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09868v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09868v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09868v1-abstract-full" style="display: none;"> In the field of signal processing, phase transition phenomena have recently attracted great attention. Donoho&#39;s work established the signal recovery threshold using indicators such as restricted isotropy (RIP) and incoherence and proved that phase transition phenomena occur in compressed sampling. Nevertheless, the phase transition phenomenon of structured sparse signals remains unclear, and these studies mainly focused on simple sparse signals. Signals with a specific structure, such as the block or tree structures common in real-world applications, are called structured sparse signals. The objectives of this article are to study the phase transition phenomenon of structured sparse signals and to investigate how structured sparse signals affect the phase transition threshold. It begins with a summary of the common subspace of structured sparse signals and the theory of high-dimensional convex polytope random projections. Next, the strong threshold expression of block-structured and tree-structured sparse signals is derived after examining the weak and strong thresholds of structured sparse signals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09868v1-abstract-full').style.display = 'none'; document.getElementById('2411.09868v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08529">arXiv:2411.08529</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.08529">pdf</a>, <a href="https://arxiv.org/ps/2411.08529">ps</a>, <a href="https://arxiv.org/format/2411.08529">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Towards Practical Deep Schedulers for Allocating Cellular Radio Resources </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Kela%2C+P">Petteri Kela</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bryan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Valcarce%2C+A">Alvaro Valcarce</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08529v1-abstract-short" style="display: inline;"> Machine learning methods are often suggested to address wireless network functions, such as radio packet scheduling. However, a feasible 3GPP-compliant scheduler capable of delivering fair throughput across users, while keeping a low computational complexity for 5G and beyond is still missing. To address this, we first take a critical look at previous deep scheduler efforts. Secondly, we enhance S&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08529v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08529v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08529v1-abstract-full" style="display: none;"> Machine learning methods are often suggested to address wireless network functions, such as radio packet scheduling. However, a feasible 3GPP-compliant scheduler capable of delivering fair throughput across users, while keeping a low computational complexity for 5G and beyond is still missing. To address this, we first take a critical look at previous deep scheduler efforts. Secondly, we enhance State-of-the-Art (SoTA) deep Reinforcement Learning (RL) algorithms and adapt them to train our deep scheduler. In particular, we propose novel training techniques for Proximal Policy Optimization (PPO) and a new Distributional Soft Actor-Critic Discrete (DSACD) algorithm, which outperformed other tested variants. These improvements were achieved while maintaining minimal actor network complexity, making them suitable for real-time computing environments. Additionally, the entropy learning in SACD was fine-tuned to accommodate resource allocation action spaces of varying sizes. Our proposed deep schedulers exhibited strong generalization across different bandwidths, number of MU-MIMO layers, and traffic models. Ultimately, we show that our pre-trained deep schedulers outperform their heuristic rivals in realistic and standard-compliant 5G system-level simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08529v1-abstract-full').style.display = 'none'; document.getElementById('2411.08529v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06184">arXiv:2411.06184</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06184">pdf</a>, <a href="https://arxiv.org/format/2411.06184">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Alleviating Hyperparameter-Tuning Burden in SVM Classifiers for Pulmonary Nodules Diagnosis with Multi-Task Bayesian Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chi%2C+W">Wenhao Chi</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+H">Haiping Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Dong%2C+H">Hongqiao Dong</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+W">Wenhua Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06184v1-abstract-short" style="display: inline;"> In the field of non-invasive medical imaging, radiomic features are utilized to measure tumor characteristics. However, these features can be affected by the techniques used to discretize the images, ultimately impacting the accuracy of diagnosis. To investigate the influence of various image discretization methods on diagnosis, it is common practice to evaluate multiple discretization strategies&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06184v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06184v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06184v1-abstract-full" style="display: none;"> In the field of non-invasive medical imaging, radiomic features are utilized to measure tumor characteristics. However, these features can be affected by the techniques used to discretize the images, ultimately impacting the accuracy of diagnosis. To investigate the influence of various image discretization methods on diagnosis, it is common practice to evaluate multiple discretization strategies individually. This approach often leads to redundant and time-consuming tasks such as training predictive models and fine-tuning hyperparameters separately. This study examines the feasibility of employing multi-task Bayesian optimization to accelerate the hyperparameters search for classifying benign and malignant pulmonary nodules using RBF SVM. Our findings suggest that multi-task Bayesian optimization significantly accelerates the search for hyperparameters in comparison to a single-task approach. To the best of our knowledge, this is the first investigation to utilize multi-task Bayesian optimization in a critical medical context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06184v1-abstract-full').style.display = 'none'; document.getElementById('2411.06184v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures, 37 references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03633">arXiv:2411.03633</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03633">pdf</a>, <a href="https://arxiv.org/format/2411.03633">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Privacy-Preserving Resilient Vector Consensus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bing Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+C">Chengcheng Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Chai%2C+L">Li Chai</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+P">Peng Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+J">Jiming Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03633v1-abstract-short" style="display: inline;"> This paper studies privacy-preserving resilient vector consensus in multi-agent systems against faulty agents, where normal agents can achieve consensus within the convex hull of their initial states while protecting state vectors from being disclosed. Specifically, we consider a modification of an existing algorithm known as Approximate Distributed Robust Convergence Using Centerpoints (ADRC), i.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03633v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03633v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03633v1-abstract-full" style="display: none;"> This paper studies privacy-preserving resilient vector consensus in multi-agent systems against faulty agents, where normal agents can achieve consensus within the convex hull of their initial states while protecting state vectors from being disclosed. Specifically, we consider a modification of an existing algorithm known as Approximate Distributed Robust Convergence Using Centerpoints (ADRC), i.e., Privacy-Preserving ADRC (PP-ADRC). Under PP-ADRC, each normal agent introduces multivariate Gaussian noise to its state during each iteration. We first provide sufficient conditions to ensure that all normal agents&#39; states can achieve mean square convergence under PP-ADRC. Then, we analyze convergence accuracy from two perspectives, i.e., the Mahalanobis distance of the final value from its expectation and the Hausdorff distance based alteration of the convex hull caused by noise when only partial dimensions are added with noise. Then, we employ concentrated geo-privacy to characterize privacy preservation and conduct a thorough comparison with differential privacy. Finally, numerical simulations demonstrate the theoretical results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03633v1-abstract-full').style.display = 'none'; document.getElementById('2411.03633v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19358">arXiv:2410.19358</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19358">pdf</a>, <a href="https://arxiv.org/format/2410.19358">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Beamforming Design and Satellite Selection for Integrated Communication and Navigation in LEO Satellite Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+J">Jiajing Li</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Binghong Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Peng%2C+M">Mugen Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19358v1-abstract-short" style="display: inline;"> Relying on the powerful communication capabilities and rapidly changing geometric configuration, the Low Earth Orbit (LEO) satellites have the potential to offer integrated communication and navigation (ICAN) services. However, the isolated resource utilization in the traditional satellite communication and navigation systems has led to a compromised system performance. Against this backdrop, this&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19358v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19358v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19358v1-abstract-full" style="display: none;"> Relying on the powerful communication capabilities and rapidly changing geometric configuration, the Low Earth Orbit (LEO) satellites have the potential to offer integrated communication and navigation (ICAN) services. However, the isolated resource utilization in the traditional satellite communication and navigation systems has led to a compromised system performance. Against this backdrop, this paper formulates a joint beamforming design and satellite selection optimization problem for the LEO-ICAN network to maximize the sum rate, while simultaneously reconciling the positioning performance. A two-layer algorithm is proposed, where the beamforming design in the inner layer is solved by the difference-of-convex programming method to maximize the sum rate, and the satellite selection in the outer layer is modeled as a coalition formation game to simultaneously reconcile the positioning performance. Simulation results verify the superiority of our proposed algorithms by increasing the sum rate by 16.6% and 29.3% compared with the conventional beamforming and satellite selection schemes, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19358v1-abstract-full').style.display = 'none'; document.getElementById('2410.19358v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been accepted by GLOBECOM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13312">arXiv:2410.13312</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13312">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Windowed Compressed Spectrum Sensing with Block sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Huiguang Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Baoguo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13312v1-abstract-short" style="display: inline;"> Compressed Spectrum Sensing (CSS) is widely employed in spectral analysis due to its sampling efficiency. However, conventional CSS assumes a standard sparse spectrum, which is affected by Spectral Leakage (SL). Despite the widespread use of CSS, the impact of SL on its performance has not been systematically and thoroughly investigated. This study addresses this research gap by analyzing the Rest&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13312v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13312v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13312v1-abstract-full" style="display: none;"> Compressed Spectrum Sensing (CSS) is widely employed in spectral analysis due to its sampling efficiency. However, conventional CSS assumes a standard sparse spectrum, which is affected by Spectral Leakage (SL). Despite the widespread use of CSS, the impact of SL on its performance has not been systematically and thoroughly investigated. This study addresses this research gap by analyzing the Restricted Isometry Property (RIP) of windowed Gaussian measurement matrices and proposing a novel block-sparse CSS model. We introduce the Edge Zeroing Coefficient (EZC) to evaluate SL suppression and RIP impact, and the Window Scaling Coefficient (WSC) to quantify the effect on RIP. Our research investigates the influence of Window Function (WF) on signal sparsity and measurement matrices, and presents a block-sparse CSS model that considers component frequency distribution, signal length, windowing, and noise floor. Based on subspace counting theory, we derive sample bound for our model. The findings demonstrate that while WFs reduce SL, excessively small EZC and WSC values can negatively affect RIP quality and cause numerical instability during signal reconstruction. This highlights the delicate balance required when applying WFs in CSS. Our block-sparse approach enables precise compression and reconstruction, particularly for high noise floor and super-sparse signals. This study provides a framework for optimizing CSS performance when dealing with SL and sparse signals, offering insights for improving signal reconstruction quality in various applications <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13312v1-abstract-full').style.display = 'none'; document.getElementById('2410.13312v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05100">arXiv:2410.05100</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.05100">pdf</a>, <a href="https://arxiv.org/format/2410.05100">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> IGroupSS-Mamba: Interval Group Spatial-Spectral Mamba for Hyperspectral Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=He%2C+Y">Yan He</a>, <a href="/search/eess?searchtype=author&amp;query=Tu%2C+B">Bing Tu</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+P">Puzhao Jiang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+J">Jun Li</a>, <a href="/search/eess?searchtype=author&amp;query=Plaza%2C+A">Antonio Plaza</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05100v1-abstract-short" style="display: inline;"> Hyperspectral image (HSI) classification has garnered substantial attention in remote sensing fields. Recent Mamba architectures built upon the Selective State Space Models (S6) have demonstrated enormous potential in long-range sequence modeling. However, the high dimensionality of hyperspectral data and information redundancy pose challenges to the application of Mamba in HSI classification, suf&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05100v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05100v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05100v1-abstract-full" style="display: none;"> Hyperspectral image (HSI) classification has garnered substantial attention in remote sensing fields. Recent Mamba architectures built upon the Selective State Space Models (S6) have demonstrated enormous potential in long-range sequence modeling. However, the high dimensionality of hyperspectral data and information redundancy pose challenges to the application of Mamba in HSI classification, suffering from suboptimal performance and computational efficiency. In light of this, this paper investigates a lightweight Interval Group Spatial-Spectral Mamba framework (IGroupSS-Mamba) for HSI classification, which allows for multi-directional and multi-scale global spatial-spectral information extraction in a grouping and hierarchical manner. Technically, an Interval Group S6 Mechanism (IGSM) is developed as the core component, which partitions high-dimensional features into multiple non-overlapping groups at intervals, and then integrates a unidirectional S6 for each group with a specific scanning direction to achieve non-redundant sequence modeling. Compared to conventional applying multi-directional scanning to all bands, this grouping strategy leverages the complementary strengths of different scanning directions while decreasing computational costs. To adequately capture the spatial-spectral contextual information, an Interval Group Spatial-Spectral Block (IGSSB) is introduced, in which two IGSM-based spatial and spectral operators are cascaded to characterize the global spatial-spectral relationship along the spatial and spectral dimensions, respectively. IGroupSS-Mamba is constructed as a hierarchical structure stacked by multiple IGSSB blocks, integrating a pixel aggregation-based downsampling strategy for multiscale spatial-spectral semantic learning from shallow to deep stages. Extensive experiments demonstrate that IGroupSS-Mamba outperforms the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05100v1-abstract-full').style.display = 'none'; document.getElementById('2410.05100v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08132">arXiv:2409.08132</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.08132">pdf</a>, <a href="https://arxiv.org/format/2409.08132">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Optimal Management of Grid-Interactive Efficient Buildings via Safe Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Huo%2C+X">Xiang Huo</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boming Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Dong%2C+J">Jin Dong</a>, <a href="/search/eess?searchtype=author&amp;query=Lian%2C+J">Jianming Lian</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+M">Mingxi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08132v1-abstract-short" style="display: inline;"> Reinforcement learning (RL)-based methods have achieved significant success in managing grid-interactive efficient buildings (GEBs). However, RL does not carry intrinsic guarantees of constraint satisfaction, which may lead to severe safety consequences. Besides, in GEB control applications, most existing safe RL approaches rely only on the regularisation parameters in neural networks or penalty o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08132v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08132v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08132v1-abstract-full" style="display: none;"> Reinforcement learning (RL)-based methods have achieved significant success in managing grid-interactive efficient buildings (GEBs). However, RL does not carry intrinsic guarantees of constraint satisfaction, which may lead to severe safety consequences. Besides, in GEB control applications, most existing safe RL approaches rely only on the regularisation parameters in neural networks or penalty of rewards, which often encounter challenges with parameter tuning and lead to catastrophic constraint violations. To provide enforced safety guarantees in controlling GEBs, this paper designs a physics-inspired safe RL method whose decision-making is enhanced through safe interaction with the environment. Different energy resources in GEBs are optimally managed to minimize energy costs and maximize customer comfort. The proposed approach can achieve strict constraint guarantees based on prior knowledge of a set of developed hard steady-state rules. Simulations on the optimal management of GEBs, including heating, ventilation, and air conditioning (HVAC), solar photovoltaics, and energy storage systems, demonstrate the effectiveness of the proposed approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08132v1-abstract-full').style.display = 'none'; document.getElementById('2409.08132v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01694">arXiv:2409.01694</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.01694">pdf</a>, <a href="https://arxiv.org/format/2409.01694">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> A novel and efficient parameter estimation of the Lognormal-Rician turbulence model based on k-Nearest Neighbor and data generation method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Miao%2C+M">Maoke Miao</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+X">Xinyu Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Yin%2C+R">Rui Yin</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+J">Jiantao Yuan</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+F">Feng Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+X">Xiao-Yu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01694v1-abstract-short" style="display: inline;"> In this paper, we propose a novel and efficient parameter estimator based on $k$-Nearest Neighbor ($k$NN) and data generation method for the Lognormal-Rician turbulence channel. The Kolmogorov-Smirnov (KS) goodness-of-fit statistical tools are employed to investigate the validity of $k$NN approximation under different channel conditions and it is shown that the choice of $k$ plays a significant ro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01694v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01694v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01694v1-abstract-full" style="display: none;"> In this paper, we propose a novel and efficient parameter estimator based on $k$-Nearest Neighbor ($k$NN) and data generation method for the Lognormal-Rician turbulence channel. The Kolmogorov-Smirnov (KS) goodness-of-fit statistical tools are employed to investigate the validity of $k$NN approximation under different channel conditions and it is shown that the choice of $k$ plays a significant role in the approximation accuracy. We present several numerical results to illustrate that solving the constructed objective function can provide a reasonable estimate for the actual values. The accuracy of the proposed estimator is investigated in terms of the mean square error. The simulation results show that increasing the number of generation samples by two orders of magnitude does not lead to a significant improvement in estimation performance when solving the optimization problem by the gradient descent algorithm. However, the estimation performance under the genetic algorithm (GA) approximates to that of the saddlepoint approximation and expectation-maximization estimators. Therefore, combined with the GA, we demonstrate that the proposed estimator achieves the best tradeoff between the computation complexity and the accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01694v1-abstract-full').style.display = 'none'; document.getElementById('2409.01694v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16277">arXiv:2408.16277</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.16277">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fine-grained Classification of Port Wine Stains Using Optical Coherence Tomography Angiography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Deng%2C+X">Xiaofeng Deng</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+D">Defu Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bowen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+X">Xiwan Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+H">Haixia Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+W">Wu Yuan</a>, <a href="/search/eess?searchtype=author&amp;query=Ren%2C+H">Hongliang Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16277v1-abstract-short" style="display: inline;"> Accurate classification of port wine stains (PWS, vascular malformations present at birth), is critical for subsequent treatment planning. However, the current method of classifying PWS based on the external skin appearance rarely reflects the underlying angiopathological heterogeneity of PWS lesions, resulting in inconsistent outcomes with the common vascular-targeted photodynamic therapy (V-PDT)&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16277v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16277v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16277v1-abstract-full" style="display: none;"> Accurate classification of port wine stains (PWS, vascular malformations present at birth), is critical for subsequent treatment planning. However, the current method of classifying PWS based on the external skin appearance rarely reflects the underlying angiopathological heterogeneity of PWS lesions, resulting in inconsistent outcomes with the common vascular-targeted photodynamic therapy (V-PDT) treatments. Conversely, optical coherence tomography angiography (OCTA) is an ideal tool for visualizing the vascular malformations of PWS. Previous studies have shown no significant correlation between OCTA quantitative metrics and the PWS subtypes determined by the current classification approach. This study proposes a new classification approach for PWS using both OCT and OCTA. By examining the hypodermic histopathology and vascular structure of PWS, we have devised a fine-grained classification method that subdivides PWS into five distinct types. To assess the angiopathological differences of various PWS subtypes, we have analyzed six metrics related to vascular morphology and depth information of PWS lesions. The five PWS types present significant differences across all metrics compared to the conventional subtypes. Our findings suggest that an angiopathology-based classification accurately reflects the heterogeneity in PWS lesions. This research marks the first attempt to classify PWS based on angiopathology, potentially guiding more effective subtyping and treatment strategies for PWS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16277v1-abstract-full').style.display = 'none'; document.getElementById('2408.16277v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01956">arXiv:2407.01956</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.01956">pdf</a>, <a href="https://arxiv.org/format/2407.01956">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Cloud-Edge-Terminal Collaborative AIGC for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+J">Jianan Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Z">Zhiwei Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boxun Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+X">Xiayi Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Yu%2C+Y">Yong Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+R">Rongqing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01956v1-abstract-short" style="display: inline;"> In dynamic autonomous driving environment, Artificial Intelligence-Generated Content (AIGC) technology can supplement vehicle perception and decision making by leveraging models&#39; generative and predictive capabilities, and has the potential to enhance motion planning, trajectory prediction and traffic simulation. This article proposes a cloud-edge-terminal collaborative architecture to support AIG&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01956v1-abstract-full').style.display = 'inline'; document.getElementById('2407.01956v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01956v1-abstract-full" style="display: none;"> In dynamic autonomous driving environment, Artificial Intelligence-Generated Content (AIGC) technology can supplement vehicle perception and decision making by leveraging models&#39; generative and predictive capabilities, and has the potential to enhance motion planning, trajectory prediction and traffic simulation. This article proposes a cloud-edge-terminal collaborative architecture to support AIGC for autonomous driving. By delving into the unique properties of AIGC services, this article initiates the attempts to construct mutually supportive AIGC and network systems for autonomous driving, including communication, storage and computation resource allocation schemes to support AIGC services, and leveraging AIGC to assist system design and resource management. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01956v1-abstract-full').style.display = 'none'; document.getElementById('2407.01956v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14440">arXiv:2406.14440</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14440">pdf</a>, <a href="https://arxiv.org/format/2406.14440">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> LLM4CP: Adapting Large Language Models for Channel Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boxun Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+X">Xuanyu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+S">Shijian Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+L">Liuqing Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14440v1-abstract-short" style="display: inline;"> Channel prediction is an effective approach for reducing the feedback or estimation overhead in massive multi-input multi-output (m-MIMO) systems. However, existing channel prediction methods lack precision due to model mismatch errors or network generalization issues. Large language models (LLMs) have demonstrated powerful modeling and generalization abilities, and have been successfully applied&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14440v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14440v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14440v1-abstract-full" style="display: none;"> Channel prediction is an effective approach for reducing the feedback or estimation overhead in massive multi-input multi-output (m-MIMO) systems. However, existing channel prediction methods lack precision due to model mismatch errors or network generalization issues. Large language models (LLMs) have demonstrated powerful modeling and generalization abilities, and have been successfully applied to cross-modal tasks, including the time series analysis. Leveraging the expressive power of LLMs, we propose a pre-trained LLM-empowered channel prediction method (LLM4CP) to predict the future downlink channel state information (CSI) sequence based on the historical uplink CSI sequence. We fine-tune the network while freezing most of the parameters of the pre-trained LLM for better cross-modality knowledge transfer. To bridge the gap between the channel data and the feature space of the LLM, preprocessor, embedding, and output modules are specifically tailored by taking into account unique channel characteristics. Simulations validate that the proposed method achieves SOTA prediction performance on full-sample, few-shot, and generalization tests with low training and inference costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14440v1-abstract-full').style.display = 'none'; document.getElementById('2406.14440v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13150">arXiv:2406.13150</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.13150">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MCAD: Multi-modal Conditioned Adversarial Diffusion Model for High-Quality PET Image Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cui%2C+J">Jiaqi Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Zeng%2C+X">Xinyi Zeng</a>, <a href="/search/eess?searchtype=author&amp;query=Zeng%2C+P">Pinxian Zeng</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+X">Xi Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+J">Jiliu Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13150v1-abstract-short" style="display: inline;"> Radiation hazards associated with standard-dose positron emission tomography (SPET) images remain a concern, whereas the quality of low-dose PET (LPET) images fails to meet clinical requirements. Therefore, there is great interest in reconstructing SPET images from LPET images. However, prior studies focus solely on image data, neglecting vital complementary information from other modalities, e.g.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13150v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13150v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13150v1-abstract-full" style="display: none;"> Radiation hazards associated with standard-dose positron emission tomography (SPET) images remain a concern, whereas the quality of low-dose PET (LPET) images fails to meet clinical requirements. Therefore, there is great interest in reconstructing SPET images from LPET images. However, prior studies focus solely on image data, neglecting vital complementary information from other modalities, e.g., patients&#39; clinical tabular, resulting in compromised reconstruction with limited diagnostic utility. Moreover, they often overlook the semantic consistency between real SPET and reconstructed images, leading to distorted semantic contexts. To tackle these problems, we propose a novel Multi-modal Conditioned Adversarial Diffusion model (MCAD) to reconstruct SPET images from multi-modal inputs, including LPET images and clinical tabular. Specifically, our MCAD incorporates a Multi-modal conditional Encoder (Mc-Encoder) to extract multi-modal features, followed by a conditional diffusion process to blend noise with multi-modal features and gradually map blended features to the target SPET images. To balance multi-modal inputs, the Mc-Encoder embeds Optimal Multi-modal Transport co-Attention (OMTA) to narrow the heterogeneity gap between image and tabular while capturing their interactions, providing sufficient guidance for reconstruction. In addition, to mitigate semantic distortions, we introduce the Multi-Modal Masked Text Reconstruction (M3TRec), which leverages semantic knowledge extracted from denoised PET images to restore the masked clinical tabular, thereby compelling the network to maintain accurate semantics during reconstruction. To expedite the diffusion process, we further introduce an adversarial diffusive network with a reduced number of diffusion steps. Experiments show that our method achieves the state-of-the-art performance both qualitatively and quantitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13150v1-abstract-full').style.display = 'none'; document.getElementById('2406.13150v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Early accepted by MICCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12707">arXiv:2406.12707</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.12707">pdf</a>, <a href="https://arxiv.org/format/2406.12707">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Talk With Human-like Agents: Empathetic Dialogue Through Perceptible Acoustic Reception and Reaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yan%2C+H">Haoqiu Yan</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yongxin Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+K">Kai Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bing Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Cao%2C+H">Haoyu Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+D">Deqiang Jiang</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+L">Linli Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12707v1-abstract-short" style="display: inline;"> Large Language Model (LLM)-enhanced agents become increasingly prevalent in Human-AI communication, offering vast potential from entertainment to professional domains. However, current multi-modal dialogue systems overlook the acoustic information present in speech, which is crucial for understanding human communication nuances. This oversight can lead to misinterpretations of speakers&#39; intentions&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12707v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12707v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12707v1-abstract-full" style="display: none;"> Large Language Model (LLM)-enhanced agents become increasingly prevalent in Human-AI communication, offering vast potential from entertainment to professional domains. However, current multi-modal dialogue systems overlook the acoustic information present in speech, which is crucial for understanding human communication nuances. This oversight can lead to misinterpretations of speakers&#39; intentions, resulting in inconsistent or even contradictory responses within dialogues. To bridge this gap, in this paper, we propose PerceptiveAgent, an empathetic multi-modal dialogue system designed to discern deeper or more subtle meanings beyond the literal interpretations of words through the integration of speech modality perception. Employing LLMs as a cognitive core, PerceptiveAgent perceives acoustic information from input speech and generates empathetic responses based on speaking styles described in natural language. Experimental results indicate that PerceptiveAgent excels in contextual understanding by accurately discerning the speakers&#39; true intentions in scenarios where the linguistic meaning is either contrary to or inconsistent with the speaker&#39;s true feelings, producing more nuanced and expressive spoken dialogues. Code is publicly available at: \url{https://github.com/Haoqiu-Yan/PerceptiveAgent}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12707v1-abstract-full').style.display = 'none'; document.getElementById('2406.12707v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 3 figures, ACL24 accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05359">arXiv:2406.05359</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.05359">pdf</a>, <a href="https://arxiv.org/format/2406.05359">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Towards Lightweight Speaker Verification via Adaptive Neural Network Quantization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bei Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Qian%2C+Y">Yanmin Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05359v3-abstract-short" style="display: inline;"> Modern speaker verification (SV) systems typically demand expensive storage and computing resources, thereby hindering their deployment on mobile devices. In this paper, we explore adaptive neural network quantization for lightweight speaker verification. Firstly, we propose a novel adaptive uniform precision quantization method which enables the dynamic generation of quantization centroids custom&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05359v3-abstract-full').style.display = 'inline'; document.getElementById('2406.05359v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05359v3-abstract-full" style="display: none;"> Modern speaker verification (SV) systems typically demand expensive storage and computing resources, thereby hindering their deployment on mobile devices. In this paper, we explore adaptive neural network quantization for lightweight speaker verification. Firstly, we propose a novel adaptive uniform precision quantization method which enables the dynamic generation of quantization centroids customized for each network layer based on k-means clustering. By applying it to the pre-trained SV systems, we obtain a series of quantized variants with different bit widths. To enhance the performance of low-bit quantized models, a mixed precision quantization algorithm along with a multi-stage fine-tuning (MSFT) strategy is further introduced. Unlike uniform precision quantization, mixed precision approach allows for the assignment of varying bit widths to different network layers. When bit combination is determined, MSFT is employed to progressively quantize and fine-tune network in a specific order. Finally, we design two distinct binary quantization schemes to mitigate performance degradation of 1-bit quantized models: the static and adaptive quantizers. Experiments on VoxCeleb demonstrate that lossless 4-bit uniform precision quantization is achieved on both ResNets and DF-ResNets, yielding a promising compression ratio of around 8. Moreover, compared to uniform precision approach, mixed precision quantization not only obtains additional performance improvements with a similar model size but also offers the flexibility to generate bit combination for any desirable model size. In addition, our suggested 1-bit quantization schemes remarkably boost the performance of binarized models. Finally, a thorough comparison with existing lightweight SV systems reveals that our proposed models outperform all previous methods by a large margin across various model size ranges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05359v3-abstract-full').style.display = 'none'; document.getElementById('2406.05359v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE/ACM Transactions on Audio, Speech, and Language Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12487">arXiv:2405.12487</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.12487">pdf</a>, <a href="https://arxiv.org/format/2405.12487">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> 3DSS-Mamba: 3D-Spectral-Spatial Mamba for Hyperspectral Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=He%2C+Y">Yan He</a>, <a href="/search/eess?searchtype=author&amp;query=Tu%2C+B">Bing Tu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+J">Jun Li</a>, <a href="/search/eess?searchtype=author&amp;query=Plaza%2C+A">Antonio Plaza</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12487v2-abstract-short" style="display: inline;"> Hyperspectral image (HSI) classification constitutes the fundamental research in remote sensing fields. Convolutional Neural Networks (CNNs) and Transformers have demonstrated impressive capability in capturing spectral-spatial contextual dependencies. However, these architectures suffer from limited receptive fields and quadratic computational complexity, respectively. Fortunately, recent Mamba a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12487v2-abstract-full').style.display = 'inline'; document.getElementById('2405.12487v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12487v2-abstract-full" style="display: none;"> Hyperspectral image (HSI) classification constitutes the fundamental research in remote sensing fields. Convolutional Neural Networks (CNNs) and Transformers have demonstrated impressive capability in capturing spectral-spatial contextual dependencies. However, these architectures suffer from limited receptive fields and quadratic computational complexity, respectively. Fortunately, recent Mamba architectures built upon the State Space Model integrate the advantages of long-range sequence modeling and linear computational efficiency, exhibiting substantial potential in low-dimensional scenarios. Motivated by this, we propose a novel 3D-Spectral-Spatial Mamba (3DSS-Mamba) framework for HSI classification, allowing for global spectral-spatial relationship modeling with greater computational efficiency. Technically, a spectral-spatial token generation (SSTG) module is designed to convert the HSI cube into a set of 3D spectral-spatial tokens. To overcome the limitations of traditional Mamba, which is confined to modeling causal sequences and inadaptable to high-dimensional scenarios, a 3D-Spectral-Spatial Selective Scanning (3DSS) mechanism is introduced, which performs pixel-wise selective scanning on 3D hyperspectral tokens along the spectral and spatial dimensions. Five scanning routes are constructed to investigate the impact of dimension prioritization. The 3DSS scanning mechanism combined with conventional mapping operations forms the 3D-spectral-spatial mamba block (3DMB), enabling the extraction of global spectral-spatial semantic representations. Experimental results and analysis demonstrate that the proposed method outperforms the state-of-the-art methods on HSI classification benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12487v2-abstract-full').style.display = 'none'; document.getElementById('2405.12487v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09778">arXiv:2405.09778</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09778">pdf</a>, <a href="https://arxiv.org/format/2405.09778">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Beam Pattern Modulation Embedded Hybrid Transceiver Optimization for Integrated Sensing and Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boxun Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+S">Shijian Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Z">Zonghui Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+L">Liuqing Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09778v1-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) emerges as a promising technology for B5G/6G, particularly in the millimeter-wave (mmWave) band. However, the widely utilized hybrid architecture in mmWave systems compromises multiplexing gain due to the constraints of limited radio frequency chains. Moreover, additional sensing functionalities exacerbate the impairment of spectrum efficiency (SE). In t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09778v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09778v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09778v1-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) emerges as a promising technology for B5G/6G, particularly in the millimeter-wave (mmWave) band. However, the widely utilized hybrid architecture in mmWave systems compromises multiplexing gain due to the constraints of limited radio frequency chains. Moreover, additional sensing functionalities exacerbate the impairment of spectrum efficiency (SE). In this paper, we present an optimized beam pattern modulation-embedded ISAC (BPM-ISAC) transceiver design, which spares one RF chain for sensing and the others for communication. To compensate for the reduced SE, index modulation across communication beams is applied. We formulate an optimization problem aimed at minimizing the mean squared error (MSE) of the sensing beampattern, subject to a symbol MSE constraint. This problem is then solved by sequentially optimizing the analog and digital parts. Both the multi-aperture structure (MAS) and the multi-beam structure (MBS) are considered for the design of the analog part. We conduct theoretical analysis on the asymptotic pairwise error probability (APEP) and the Cram茅r-Rao bound (CRB) of direction of arrival (DoA) estimation. Numerical simulations validate the overall enhanced ISAC performance over existing alternatives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09778v1-abstract-full').style.display = 'none'; document.getElementById('2405.09778v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09298">arXiv:2405.09298</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09298">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Blur Multi-Model (DeepBlurMM) -- a strategy to mitigate the impact of image blur on deep learning model performance in histopathology image analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xiang%2C+Y">Yujie Xiang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bojing Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Rantalainen%2C+M">Mattias Rantalainen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09298v3-abstract-short" style="display: inline;"> AI-based analysis of histopathology whole slide images (WSIs) is central in computational pathology. However, image quality, including unsharp areas of WSIs, impacts model performance. We investigate the impact of blur and propose a multi-model approach to mitigate negative impact of unsharp image areas. In this study, we use a simulation approach, evaluating model performance under varying levels&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09298v3-abstract-full').style.display = 'inline'; document.getElementById('2405.09298v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09298v3-abstract-full" style="display: none;"> AI-based analysis of histopathology whole slide images (WSIs) is central in computational pathology. However, image quality, including unsharp areas of WSIs, impacts model performance. We investigate the impact of blur and propose a multi-model approach to mitigate negative impact of unsharp image areas. In this study, we use a simulation approach, evaluating model performance under varying levels of added Gaussian blur to image tiles from &gt;900 H&amp;E-stained breast cancer WSIs. To reduce impact of blur, we propose a novel multi-model approach (DeepBlurMM) where multiple models trained on data with variable amounts of Gaussian blur are used to predict tiles based on their blur levels. Using histological grade as a principal example, we found that models trained with mildly blurred tiles improved performance over the base model when moderate-high blur was present. DeepBlurMM outperformed the base model in presence of moderate blur across all tiles (AUC:0.764 vs. 0.710), and in presence of a mix of low, moderate, and high blur across tiles (AUC:0.821 vs. 0.789). Unsharp image tiles in WSIs impact prediction performance. DeepBlurMM improved prediction performance under some conditions and has the potential to increase quality in both research and clinical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09298v3-abstract-full').style.display = 'none'; document.getElementById('2405.09298v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4; J.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17736">arXiv:2404.17736</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.17736">pdf</a>, <a href="https://arxiv.org/format/2404.17736">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Diffusion-Aided Joint Source Channel Coding For High Realism Wireless Image Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yang%2C+M">Mingyu Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bowen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+B">Boyang Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Kim%2C+H">Hun-Seok Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17736v2-abstract-short" style="display: inline;"> Deep learning-based joint source-channel coding (deep JSCC) has been demonstrated to be an effective approach for wireless image transmission. Nevertheless, most existing work adopts an autoencoder framework to optimize conventional criteria such as Mean Squared Error (MSE) and Structural Similarity Index (SSIM) which do not suffice to maintain the perceptual quality of reconstructed images. Such&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17736v2-abstract-full').style.display = 'inline'; document.getElementById('2404.17736v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17736v2-abstract-full" style="display: none;"> Deep learning-based joint source-channel coding (deep JSCC) has been demonstrated to be an effective approach for wireless image transmission. Nevertheless, most existing work adopts an autoencoder framework to optimize conventional criteria such as Mean Squared Error (MSE) and Structural Similarity Index (SSIM) which do not suffice to maintain the perceptual quality of reconstructed images. Such an issue is more prominent under stringent bandwidth constraints or low signal-to-noise ratio (SNR) conditions. To tackle this challenge, we propose DiffJSCC, a novel framework that leverages the prior knowledge of the pre-trained Statble Diffusion model to produce high-realism images via the conditional diffusion denoising process. Our DiffJSCC first extracts multimodal spatial and textual features from the noisy channel symbols in the generation phase. Then, it produces an initial reconstructed image as an intermediate representation to aid robust feature extraction and a stable training process. In the following diffusion step, DiffJSCC uses the derived multimodal features, together with channel state information such as the signal-to-noise ratio (SNR), as conditions to guide the denoising diffusion process, which converts the initial random noise to the final reconstruction. DiffJSCC employs a novel control module to fine-tune the Stable Diffusion model and adjust it to the multimodal conditions. Extensive experiments on diverse datasets reveal that our method significantly surpasses prior deep JSCC approaches on both perceptual metrics and downstream task performance, showcasing its ability to preserve the semantics of the original transmitted images. Notably, DiffJSCC can achieve highly realistic reconstructions for 768x512 pixel Kodak images with only 3072 symbols (&lt;0.008 symbols per pixel) under 1dB SNR channels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17736v2-abstract-full').style.display = 'none'; document.getElementById('2404.17736v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07556">arXiv:2404.07556</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.07556">pdf</a>, <a href="https://arxiv.org/format/2404.07556">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Attention-Aware Laparoscopic Image Desmoking Network with Lightness Embedding and Hybrid Guided Embedding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Ziteng Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+J">Jiahua Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bainan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+H">Hao Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+W">Wenpeng Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Fu%2C+Y">Yili Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07556v1-abstract-short" style="display: inline;"> This paper presents a novel method of smoke removal from the laparoscopic images. Due to the heterogeneous nature of surgical smoke, a two-stage network is proposed to estimate the smoke distribution and reconstruct a clear, smoke-free surgical scene. The utilization of the lightness channel plays a pivotal role in providing vital information pertaining to smoke density. The reconstruction of smok&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07556v1-abstract-full').style.display = 'inline'; document.getElementById('2404.07556v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07556v1-abstract-full" style="display: none;"> This paper presents a novel method of smoke removal from the laparoscopic images. Due to the heterogeneous nature of surgical smoke, a two-stage network is proposed to estimate the smoke distribution and reconstruct a clear, smoke-free surgical scene. The utilization of the lightness channel plays a pivotal role in providing vital information pertaining to smoke density. The reconstruction of smoke-free image is guided by a hybrid embedding, which combines the estimated smoke mask with the initial image. Experimental results demonstrate that the proposed method boasts a Peak Signal to Noise Ratio that is $2.79\%$ higher than the state-of-the-art methods, while also exhibits a remarkable $38.2\%$ reduction in run-time. Overall, the proposed method offers comparable or even superior performance in terms of both smoke removal quality and computational efficiency when compared to existing state-of-the-art methods. This work will be publicly available on http://homepage.hit.edu.cn/wpgao <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07556v1-abstract-full').style.display = 'none'; document.getElementById('2404.07556v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ISBI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15418">arXiv:2403.15418</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.15418">pdf</a>, <a href="https://arxiv.org/format/2403.15418">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Stochastic Analysis of Touch-Tone Frequency Recognition in Two-Way Radio Systems for Dialed Telephone Number Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yu%2C+L">Liqiang Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Chen Li</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Che%2C+C">Chang Che</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15418v1-abstract-short" style="display: inline;"> This paper focuses on recognizing dialed numbers in a touch-tone telephone system based on the Dual Tone MultiFrequency (DTMF) signaling technique with analysis of stochastic aspects during the noise and random duration of characters. Each dialed digit&#39;s acoustic profile is derived from a composite of two carrier frequencies, distinctly assigned to represent that digit. The identification of each&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15418v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15418v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15418v1-abstract-full" style="display: none;"> This paper focuses on recognizing dialed numbers in a touch-tone telephone system based on the Dual Tone MultiFrequency (DTMF) signaling technique with analysis of stochastic aspects during the noise and random duration of characters. Each dialed digit&#39;s acoustic profile is derived from a composite of two carrier frequencies, distinctly assigned to represent that digit. The identification of each digit is achieved by pinpointing the frequency pair with the highest energy or amplitude in its spectral output, utilizing the Discrete-Time Fourier Transform (DTFT). This analysis includes simulations that illustrate the effects of introducing stochastic variations during the &#34;Mark&#34; and &#34;Space&#34; intervals of the decoding process, offering insights into the technique&#39;s efficacy and the impact of random temporal fluctuations. This will reduce the accuracy of decoder decoding and lower the SNR of the system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15418v1-abstract-full').style.display = 'none'; document.getElementById('2403.15418v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">It is accepted by The 7th International Conference on Advanced Algorithms and Control Engineering (ICAACE 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15238">arXiv:2403.15238</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.15238">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> WEEP: A method for spatial interpretation of weakly supervised CNN models in computational pathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Sharma%2C+A">Abhinav Sharma</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bojing Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Rantalainen%2C+M">Mattias Rantalainen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15238v3-abstract-short" style="display: inline;"> Deep learning enables the modelling of high-resolution histopathology whole-slide images (WSI). Weakly supervised learning of tile-level data is typically applied for tasks where labels only exist on the patient or WSI level (e.g. patient outcomes or histological grading). In this context, there is a need for improved spatial interpretability of predictions from such models. We propose a novel met&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15238v3-abstract-full').style.display = 'inline'; document.getElementById('2403.15238v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15238v3-abstract-full" style="display: none;"> Deep learning enables the modelling of high-resolution histopathology whole-slide images (WSI). Weakly supervised learning of tile-level data is typically applied for tasks where labels only exist on the patient or WSI level (e.g. patient outcomes or histological grading). In this context, there is a need for improved spatial interpretability of predictions from such models. We propose a novel method, Wsi rEgion sElection aPproach (WEEP), for model interpretation. It provides a principled yet straightforward way to establish the spatial area of WSI required for assigning a particular prediction label. We demonstrate WEEP on a binary classification task in the area of breast cancer computational pathology. WEEP is easy to implement, is directly connected to the model-based decision process, and offers information relevant to both research and diagnostic applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15238v3-abstract-full').style.display = 'none'; document.getElementById('2403.15238v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13290">arXiv:2403.13290</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.13290">pdf</a>, <a href="https://arxiv.org/format/2403.13290">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Log-domain Interior Point Method for Convex Quadratic Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bingqi Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Liao-McPherson%2C+D">Dominic Liao-McPherson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13290v1-abstract-short" style="display: inline;"> In this paper, we propose an equilibrium-seeking algorithm for finding generalized Nash equilibria of non-cooperative monotone convex quadratic games. Specifically, we recast the Nash equilibrium-seeking problem as variational inequality problem that we solve using a log-domain interior point method and provide a general purpose solver based on this algorithm. This approach is suitable for non-pot&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13290v1-abstract-full').style.display = 'inline'; document.getElementById('2403.13290v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13290v1-abstract-full" style="display: none;"> In this paper, we propose an equilibrium-seeking algorithm for finding generalized Nash equilibria of non-cooperative monotone convex quadratic games. Specifically, we recast the Nash equilibrium-seeking problem as variational inequality problem that we solve using a log-domain interior point method and provide a general purpose solver based on this algorithm. This approach is suitable for non-potential, general sum games and does not require extensive structural assumptions. We demonstrate the efficiency and versatility of our method using three benchmark games and demonstrate our algorithm is especially effective on small to medium scale problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13290v1-abstract-full').style.display = 'none'; document.getElementById('2403.13290v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.10417">arXiv:2403.10417</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.10417">pdf</a>, <a href="https://arxiv.org/format/2403.10417">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Beam Pattern Modulation Embedded mmWave Hybrid Transceiver Design Towards ISAC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boxun Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+S">Shijian Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Z">Zonghui Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+X">Xiang Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.10417v1-abstract-short" style="display: inline;"> Integrated Sensing and Communication (ISAC) emerges as a promising technology for B5G/6G, particularly in the millimeter-wave (mmWave) band. However, the widespread adoption of hybrid architecture in mmWave systems compromises multiplexing gain due to limited radio-frequency chains, resulting in mediocre performance when embedding sensing functionality. To avoid sacrificing the spectrum efficiency&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10417v1-abstract-full').style.display = 'inline'; document.getElementById('2403.10417v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.10417v1-abstract-full" style="display: none;"> Integrated Sensing and Communication (ISAC) emerges as a promising technology for B5G/6G, particularly in the millimeter-wave (mmWave) band. However, the widespread adoption of hybrid architecture in mmWave systems compromises multiplexing gain due to limited radio-frequency chains, resulting in mediocre performance when embedding sensing functionality. To avoid sacrificing the spectrum efficiency in hybrid structures while addressing performance bottlenecks in its extension to ISAC, we present an optimized beam pattern modulation-embedded ISAC (BPM-ISAC). BPM-ISAC applies index modulation over beamspace by selectively activating communication beams, aiming to minimize sensing beampattern mean squared error (MSE) under communication MSE constraints through dedicated hybrid transceiver design. Optimization involves the analog part through a min-MSE-based beam selection algorithm, followed by the digital part using an alternating optimization algorithm. Convergence and asymptotic pairwise error probability (APEP) analyses accompany numerical simulations, validating its overall enhanced ISAC performance over existing alternatives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10417v1-abstract-full').style.display = 'none'; document.getElementById('2403.10417v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09442">arXiv:2402.09442</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.09442">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Progress in artificial intelligence applications based on the combination of self-driven sensors and deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wan%2C+W">Weixiang Wan</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+W">Wenjian Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Zeng%2C+Q">Qiang Zeng</a>, <a href="/search/eess?searchtype=author&amp;query=Pan%2C+L">Linying Pan</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+J">Jingyu Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09442v3-abstract-short" style="display: inline;"> In the era of Internet of Things, how to develop a smart sensor system with sustainable power supply, easy deployment and flexible use has become a difficult problem to be solved. The traditional power supply has problems such as frequent replacement or charging when in use, which limits the development of wearable devices. The contact-to-separate friction nanogenerator (TENG) was prepared by usin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09442v3-abstract-full').style.display = 'inline'; document.getElementById('2402.09442v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09442v3-abstract-full" style="display: none;"> In the era of Internet of Things, how to develop a smart sensor system with sustainable power supply, easy deployment and flexible use has become a difficult problem to be solved. The traditional power supply has problems such as frequent replacement or charging when in use, which limits the development of wearable devices. The contact-to-separate friction nanogenerator (TENG) was prepared by using polychotomy thy lene (PTFE) and aluminum (AI) foils. Human motion energy was collected by human body arrangement, and human motion posture was monitored according to the changes of output electrical signals. In 2012, Academician Wang Zhong lin and his team invented the triboelectric nanogenerator (TENG), which uses Maxwell displacement current as a driving force to directly convert mechanical stimuli into electrical signals, so it can be used as a self-driven sensor. Teng-based sensors have the advantages of simple structure and high instantaneous power density, which provides an important means for building intelligent sensor systems. At the same time, machine learning, as a technology with low cost, short development cycle, strong data processing ability and prediction ability, has a significant effect on the processing of a large number of electrical signals generated by TENG, and the combination with TENG sensors will promote the rapid development of intelligent sensor networks in the future. Therefore, this paper is based on the intelligent sound monitoring and recognition system of TENG, which has good sound recognition capability, and aims to evaluate the feasibility of the sound perception module architecture in ubiquitous sensor networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09442v3-abstract-full').style.display = 'none'; document.getElementById('2402.09442v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This aticle was accepted by ieee conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04267">arXiv:2402.04267</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.04267">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.54097/LAwfJzEA">10.54097/LAwfJzEA <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Application analysis of ai technology combined with spiral CT scanning in early lung cancer screening </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+S">Shulin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Yu%2C+L">Liqiang Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+Q">Qunwei Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+J">Jiaxin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04267v1-abstract-short" style="display: inline;"> At present, the incidence and fatality rate of lung cancer in China rank first among all malignant tumors. Despite the continuous development and improvement of China&#39;s medical level, the overall 5-year survival rate of lung cancer patients is still lower than 20% and is staged. A number of studies have confirmed that early diagnosis and treatment of early stage lung cancer is of great significanc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04267v1-abstract-full').style.display = 'inline'; document.getElementById('2402.04267v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04267v1-abstract-full" style="display: none;"> At present, the incidence and fatality rate of lung cancer in China rank first among all malignant tumors. Despite the continuous development and improvement of China&#39;s medical level, the overall 5-year survival rate of lung cancer patients is still lower than 20% and is staged. A number of studies have confirmed that early diagnosis and treatment of early stage lung cancer is of great significance to improve the prognosis of patients. In recent years, artificial intelligence technology has gradually begun to be applied in oncology. ai is used in cancer screening, clinical diagnosis, radiation therapy (image acquisition, at-risk organ segmentation, image calibration and delivery) and other aspects of rapid development. However, whether medical ai can be socialized depends on the public&#39;s attitude and acceptance to a certain extent. However, at present, there are few studies on the diagnosis of early lung cancer by AI technology combined with SCT scanning. In view of this, this study applied the combined method in early lung cancer screening, aiming to find a safe and efficient screening mode and provide a reference for clinical diagnosis and treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04267v1-abstract-full').style.display = 'none'; document.getElementById('2402.04267v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This article was accepted by Frontiers in Computing and Intelligent Systems https://drpress.org/ojs/index.php/fcis/article/view/15781. arXiv admin note: text overlap with arXiv:nlin/0508031 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02327">arXiv:2402.02327</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.02327">pdf</a>, <a href="https://arxiv.org/format/2402.02327">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Bootstrapping Audio-Visual Segmentation by Strengthening Audio Cues </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+T">Tianxiang Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Tan%2C+Z">Zhentao Tan</a>, <a href="/search/eess?searchtype=author&amp;query=Gong%2C+T">Tao Gong</a>, <a href="/search/eess?searchtype=author&amp;query=Chu%2C+Q">Qi Chu</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Y">Yue Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Lu%2C+L">Le Lu</a>, <a href="/search/eess?searchtype=author&amp;query=Ye%2C+J">Jieping Ye</a>, <a href="/search/eess?searchtype=author&amp;query=Yu%2C+N">Nenghai Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02327v2-abstract-short" style="display: inline;"> How to effectively interact audio with vision has garnered considerable interest within the multi-modality research field. Recently, a novel audio-visual segmentation (AVS) task has been proposed, aiming to segment the sounding objects in video frames under the guidance of audio cues. However, most existing AVS methods are hindered by a modality imbalance where the visual features tend to dominate&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02327v2-abstract-full').style.display = 'inline'; document.getElementById('2402.02327v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02327v2-abstract-full" style="display: none;"> How to effectively interact audio with vision has garnered considerable interest within the multi-modality research field. Recently, a novel audio-visual segmentation (AVS) task has been proposed, aiming to segment the sounding objects in video frames under the guidance of audio cues. However, most existing AVS methods are hindered by a modality imbalance where the visual features tend to dominate those of the audio modality, due to a unidirectional and insufficient integration of audio cues. This imbalance skews the feature representation towards the visual aspect, impeding the learning of joint audio-visual representations and potentially causing segmentation inaccuracies. To address this issue, we propose AVSAC. Our approach features a Bidirectional Audio-Visual Decoder (BAVD) with integrated bidirectional bridges, enhancing audio cues and fostering continuous interplay between audio and visual modalities. This bidirectional interaction narrows the modality imbalance, facilitating more effective learning of integrated audio-visual representations. Additionally, we present a strategy for audio-visual frame-wise synchrony as fine-grained guidance of BAVD. This strategy enhances the share of auditory components in visual features, contributing to a more balanced audio-visual representation learning. Extensive experiments show that our method attains new benchmarks in AVS performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02327v2-abstract-full').style.display = 'none'; document.getElementById('2402.02327v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.06272">arXiv:2401.06272</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.06272">pdf</a>, <a href="https://arxiv.org/format/2401.06272">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Segmentation of Mediastinal Lymph Nodes in CT with Anatomical Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Mathai%2C+T+S">Tejas Sudharshan Mathai</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bohan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Summers%2C+R+M">Ronald M. Summers</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.06272v1-abstract-short" style="display: inline;"> Purpose: Lymph nodes (LNs) in the chest have a tendency to enlarge due to various pathologies, such as lung cancer or pneumonia. Clinicians routinely measure nodal size to monitor disease progression, confirm metastatic cancer, and assess treatment response. However, variations in their shapes and appearances make it cumbersome to identify LNs, which reside outside of most organs. Methods: We prop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06272v1-abstract-full').style.display = 'inline'; document.getElementById('2401.06272v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.06272v1-abstract-full" style="display: none;"> Purpose: Lymph nodes (LNs) in the chest have a tendency to enlarge due to various pathologies, such as lung cancer or pneumonia. Clinicians routinely measure nodal size to monitor disease progression, confirm metastatic cancer, and assess treatment response. However, variations in their shapes and appearances make it cumbersome to identify LNs, which reside outside of most organs. Methods: We propose to segment LNs in the mediastinum by leveraging the anatomical priors of 28 different structures (e.g., lung, trachea etc.) generated by the public TotalSegmentator tool. The CT volumes from 89 patients available in the public NIH CT Lymph Node dataset were used to train three 3D nnUNet models to segment LNs. The public St. Olavs dataset containing 15 patients (out-of-training-distribution) was used to evaluate the segmentation performance. Results: For the 15 test patients, the 3D cascade nnUNet model obtained the highest Dice score of 72.2 +- 22.3 for mediastinal LNs with short axis diameter $\geq$ 8mm and 54.8 +- 23.8 for all LNs respectively. These results represent an improvement of 10 points over a current approach that was evaluated on the same test dataset. Conclusion: To our knowledge, we are the first to harness 28 distinct anatomical priors to segment mediastinal LNs, and our work can be extended to other nodal zones in the body. The proposed method has immense potential for improved patient outcomes through the identification of enlarged nodes in initial staging CT scans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06272v1-abstract-full').style.display = 'none'; document.getElementById('2401.06272v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to CARS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05698">arXiv:2401.05698</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.05698">pdf</a>, <a href="https://arxiv.org/format/2401.05698">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.inffus.2024.102382">10.1016/j.inffus.2024.102382 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> HiCMAE: Hierarchical Contrastive Masked Autoencoder for Self-Supervised Audio-Visual Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Sun%2C+L">Licai Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Lian%2C+Z">Zheng Lian</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Tao%2C+J">Jianhua Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05698v2-abstract-short" style="display: inline;"> Audio-Visual Emotion Recognition (AVER) has garnered increasing attention in recent years for its critical role in creating emotion-ware intelligent machines. Previous efforts in this area are dominated by the supervised learning paradigm. Despite significant progress, supervised learning is meeting its bottleneck due to the longstanding data scarcity issue in AVER. Motivated by recent advances in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05698v2-abstract-full').style.display = 'inline'; document.getElementById('2401.05698v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05698v2-abstract-full" style="display: none;"> Audio-Visual Emotion Recognition (AVER) has garnered increasing attention in recent years for its critical role in creating emotion-ware intelligent machines. Previous efforts in this area are dominated by the supervised learning paradigm. Despite significant progress, supervised learning is meeting its bottleneck due to the longstanding data scarcity issue in AVER. Motivated by recent advances in self-supervised learning, we propose Hierarchical Contrastive Masked Autoencoder (HiCMAE), a novel self-supervised framework that leverages large-scale self-supervised pre-training on vast unlabeled audio-visual data to promote the advancement of AVER. Following prior arts in self-supervised audio-visual representation learning, HiCMAE adopts two primary forms of self-supervision for pre-training, namely masked data modeling and contrastive learning. Unlike them which focus exclusively on top-layer representations while neglecting explicit guidance of intermediate layers, HiCMAE develops a three-pronged strategy to foster hierarchical audio-visual feature learning and improve the overall quality of learned representations. To verify the effectiveness of HiCMAE, we conduct extensive experiments on 9 datasets covering both categorical and dimensional AVER tasks. Experimental results show that our method significantly outperforms state-of-the-art supervised and self-supervised audio-visual methods, which indicates that HiCMAE is a powerful audio-visual emotion representation learner. Codes and models will be publicly available at https://github.com/sunlicai/HiCMAE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05698v2-abstract-full').style.display = 'none'; document.getElementById('2401.05698v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Information Fusion. The code is available at https://github.com/sunlicai/HiCMAE</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Information Fusion, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15628">arXiv:2312.15628</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.15628">pdf</a>, <a href="https://arxiv.org/format/2312.15628">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Balanced SNR-Aware Distillation for Guided Text-to-Audio Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bingzhi Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Cao%2C+Y">Yin Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+H">Haohe Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+Y">Yi Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15628v1-abstract-short" style="display: inline;"> Diffusion models have demonstrated promising results in text-to-audio generation tasks. However, their practical usability is hindered by slow sampling speeds, limiting their applicability in high-throughput scenarios. To address this challenge, progressive distillation methods have been effective in producing more compact and efficient models. Nevertheless, these methods encounter issues with unb&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15628v1-abstract-full').style.display = 'inline'; document.getElementById('2312.15628v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15628v1-abstract-full" style="display: none;"> Diffusion models have demonstrated promising results in text-to-audio generation tasks. However, their practical usability is hindered by slow sampling speeds, limiting their applicability in high-throughput scenarios. To address this challenge, progressive distillation methods have been effective in producing more compact and efficient models. Nevertheless, these methods encounter issues with unbalanced weights at both high and low noise levels, potentially impacting the quality of generated samples. In this paper, we propose the adaptation of the progressive distillation method to text-to-audio generation tasks and introduce the Balanced SNR-Aware~(BSA) method, an enhanced loss-weighting mechanism for diffusion distillation. The BSA method employs a balanced approach to weight the loss for both high and low noise levels. We evaluate our proposed method on the AudioCaps dataset and report experimental results showing superior performance during the reverse diffusion process compared to previous distillation methods with the same number of sampling steps. Furthermore, the BSA method allows for a significant reduction in sampling steps from 200 to 25, with minimal performance degradation when compared to the original teacher models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15628v1-abstract-full').style.display = 'none'; document.getElementById('2312.15628v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.02199">arXiv:2312.02199</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.02199">pdf</a>, <a href="https://arxiv.org/format/2312.02199">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> USat: A Unified Self-Supervised Encoder for Multi-Sensor Satellite Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Irvin%2C+J">Jeremy Irvin</a>, <a href="/search/eess?searchtype=author&amp;query=Tao%2C+L">Lucas Tao</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+J">Joanne Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+Y">Yuntao Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Nashold%2C+L">Langston Nashold</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Benjamin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Ng%2C+A+Y">Andrew Y. Ng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.02199v1-abstract-short" style="display: inline;"> Large, self-supervised vision models have led to substantial advancements for automatically interpreting natural images. Recent works have begun tailoring these methods to remote sensing data which has rich structure with multi-sensor, multi-spectral, and temporal information providing massive amounts of self-labeled data that can be used for self-supervised pre-training. In this work, we develop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.02199v1-abstract-full').style.display = 'inline'; document.getElementById('2312.02199v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.02199v1-abstract-full" style="display: none;"> Large, self-supervised vision models have led to substantial advancements for automatically interpreting natural images. Recent works have begun tailoring these methods to remote sensing data which has rich structure with multi-sensor, multi-spectral, and temporal information providing massive amounts of self-labeled data that can be used for self-supervised pre-training. In this work, we develop a new encoder architecture called USat that can input multi-spectral data from multiple sensors for self-supervised pre-training. USat is a vision transformer with modified patch projection layers and positional encodings to model spectral bands with varying spatial scales from multiple sensors. We integrate USat into a Masked Autoencoder (MAE) self-supervised pre-training procedure and find that a pre-trained USat outperforms state-of-the-art self-supervised MAE models trained on remote sensing data on multiple remote sensing benchmark datasets (up to 8%) and leads to improvements in low data regimes (up to 7%). Code and pre-trained weights are available at https://github.com/stanfordmlgroup/USat . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.02199v1-abstract-full').style.display = 'none'; document.getElementById('2312.02199v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00996">arXiv:2311.00996</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.00996">pdf</a>, <a href="https://arxiv.org/format/2311.00996">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VCISR: Blind Single Image Super-Resolution with Video Compression Synthetic Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+B">Boyang Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bowen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+S">Shiyu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+F">Fengyu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00996v2-abstract-short" style="display: inline;"> In the blind single image super-resolution (SISR) task, existing works have been successful in restoring image-level unknown degradations. However, when a single video frame becomes the input, these works usually fail to address degradations caused by video compression, such as mosquito noise, ringing, blockiness, and staircase noise. In this work, we for the first time, present a video compressio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00996v2-abstract-full').style.display = 'inline'; document.getElementById('2311.00996v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00996v2-abstract-full" style="display: none;"> In the blind single image super-resolution (SISR) task, existing works have been successful in restoring image-level unknown degradations. However, when a single video frame becomes the input, these works usually fail to address degradations caused by video compression, such as mosquito noise, ringing, blockiness, and staircase noise. In this work, we for the first time, present a video compression-based degradation model to synthesize low-resolution image data in the blind SISR task. Our proposed image synthesizing method is widely applicable to existing image datasets, so that a single degraded image can contain distortions caused by the lossy video compression algorithms. This overcomes the leak of feature diversity in video data and thus retains the training efficiency. By introducing video coding artifacts to SISR degradation models, neural networks can super-resolve images with the ability to restore video compression degradations, and achieve better results on restoring generic distortions caused by image compression as well. Our proposed approach achieves superior performance in SOTA no-reference Image Quality Assessment, and shows better visual quality on various datasets. In addition, we evaluate the SISR neural network trained with our degradation model on video super-resolution (VSR) datasets. Compared to architectures specifically designed for the VSR purpose, our method exhibits similar or better performance, evidencing that the presented strategy on infusing video-based degradation is generalizable to address more complicated compression artifacts even without temporal cues. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00996v2-abstract-full').style.display = 'none'; document.getElementById('2311.00996v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.08080">arXiv:2310.08080</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.08080">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RT-SRTS: Angle-Agnostic Real-Time Simultaneous 3D Reconstruction and Tumor Segmentation from Single X-Ray Projection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+M">Miao Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Fu%2C+Q">Qiming Fu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+M">Mengxi Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+B">Bojian Li</a>, <a href="/search/eess?searchtype=author&amp;query=Luo%2C+X">Xiaoyan Luo</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+F">Fugen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.08080v2-abstract-short" style="display: inline;"> Radiotherapy is one of the primary treatment methods for tumors, but the organ movement caused by respiration limits its accuracy. Recently, 3D imaging from a single X-ray projection has received extensive attention as a promising approach to address this issue. However, current methods can only reconstruct 3D images without directly locating the tumor and are only validated for fixed-angle imagin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08080v2-abstract-full').style.display = 'inline'; document.getElementById('2310.08080v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.08080v2-abstract-full" style="display: none;"> Radiotherapy is one of the primary treatment methods for tumors, but the organ movement caused by respiration limits its accuracy. Recently, 3D imaging from a single X-ray projection has received extensive attention as a promising approach to address this issue. However, current methods can only reconstruct 3D images without directly locating the tumor and are only validated for fixed-angle imaging, which fails to fully meet the requirements of motion control in radiotherapy. In this study, a novel imaging method RT-SRTS is proposed which integrates 3D imaging and tumor segmentation into one network based on multi-task learning (MTL) and achieves real-time simultaneous 3D reconstruction and tumor segmentation from a single X-ray projection at any angle. Furthermore, the attention enhanced calibrator (AEC) and uncertain-region elaboration (URE) modules have been proposed to aid feature extraction and improve segmentation accuracy. The proposed method was evaluated on fifteen patient cases and compared with three state-of-the-art methods. It not only delivers superior 3D reconstruction but also demonstrates commendable tumor segmentation results. Simultaneous reconstruction and segmentation can be completed in approximately 70 ms, significantly faster than the required time threshold for real-time tumor tracking. The efficacies of both AEC and URE have also been validated in ablation studies. The code of work is available at https://github.com/ZywooSimple/RT-SRTS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08080v2-abstract-full').style.display = 'none'; document.getElementById('2310.08080v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.07198">arXiv:2309.07198</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.07198">pdf</a>, <a href="https://arxiv.org/format/2309.07198">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1364/OL.515429">10.1364/OL.515429 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Temporal compressive edge imaging enabled by a lensless diffuser camera </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+Z">Ze Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Baolei Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Song%2C+J">Jiaqi Song</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+L">Lei Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Zhong%2C+X">Xiaolan Zhong</a>, <a href="/search/eess?searchtype=author&amp;query=Mcgloin%2C+D">David Mcgloin</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+F">Fan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.07198v1-abstract-short" style="display: inline;"> Lensless imagers based on diffusers or encoding masks enable high-dimensional imaging from a single shot measurement and have been applied in various applications. However, to further extract image information such as edge detection, conventional post-processing filtering operations are needed after the reconstruction of the original object images in the diffuser imaging systems. Here, we present&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.07198v1-abstract-full').style.display = 'inline'; document.getElementById('2309.07198v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.07198v1-abstract-full" style="display: none;"> Lensless imagers based on diffusers or encoding masks enable high-dimensional imaging from a single shot measurement and have been applied in various applications. However, to further extract image information such as edge detection, conventional post-processing filtering operations are needed after the reconstruction of the original object images in the diffuser imaging systems. Here, we present the concept of a temporal compressive edge detection method based on a lensless diffuser camera, which can directly recover a time sequence of edge images of a moving object from a single-shot measurement, without further post-processing steps. Our approach provides higher image quality during edge detection, compared with the conventional post-processing method. We demonstrate the effectiveness of this approach by both numerical simulation and experiments. The proof-of-concept approach can be further developed with other image post-process operations or versatile computer vision assignments toward task-oriented intelligent lensless imaging systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.07198v1-abstract-full').style.display = 'none'; document.getElementById('2309.07198v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 4 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Optics Letters, 49(11), 3058-3061 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13234">arXiv:2308.13234</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.13234">pdf</a>, <a href="https://arxiv.org/format/2308.13234">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Decoding Natural Images from EEG for Object Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Song%2C+Y">Yonghao Song</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bingchuan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+X">Xiang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+N">Nanlin Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yijun Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+X">Xiaorong Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13234v3-abstract-short" style="display: inline;"> Electroencephalography (EEG) signals, known for convenient non-invasive acquisition but low signal-to-noise ratio, have recently gained substantial attention due to the potential to decode natural images. This paper presents a self-supervised framework to demonstrate the feasibility of learning image representations from EEG signals, particularly for object recognition. The framework utilizes imag&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13234v3-abstract-full').style.display = 'inline'; document.getElementById('2308.13234v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13234v3-abstract-full" style="display: none;"> Electroencephalography (EEG) signals, known for convenient non-invasive acquisition but low signal-to-noise ratio, have recently gained substantial attention due to the potential to decode natural images. This paper presents a self-supervised framework to demonstrate the feasibility of learning image representations from EEG signals, particularly for object recognition. The framework utilizes image and EEG encoders to extract features from paired image stimuli and EEG responses. Contrastive learning aligns these two modalities by constraining their similarity. With the framework, we attain significantly above-chance results on a comprehensive EEG-image dataset, achieving a top-1 accuracy of 15.6% and a top-5 accuracy of 42.8% in challenging 200-way zero-shot tasks. Moreover, we perform extensive experiments to explore the biological plausibility by resolving the temporal, spatial, spectral, and semantic aspects of EEG signals. Besides, we introduce attention modules to capture spatial correlations, providing implicit evidence of the brain activity perceived from EEG data. These findings yield valuable insights for neural decoding and brain-computer interfaces in real-world scenarios. The code will be released on https://github.com/eeyhsong/NICE-EEG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13234v3-abstract-full').style.display = 'none'; document.getElementById('2308.13234v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.09223">arXiv:2308.09223</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.09223">pdf</a>, <a href="https://arxiv.org/format/2308.09223">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DMCVR: Morphology-Guided Diffusion Model for 3D Cardiac Volume Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=He%2C+X">Xiaoxiao He</a>, <a href="/search/eess?searchtype=author&amp;query=Tan%2C+C">Chaowei Tan</a>, <a href="/search/eess?searchtype=author&amp;query=Han%2C+L">Ligong Han</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Axel%2C+L">Leon Axel</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+K">Kang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Metaxas%2C+D+N">Dimitris N. Metaxas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.09223v1-abstract-short" style="display: inline;"> Accurate 3D cardiac reconstruction from cine magnetic resonance imaging (cMRI) is crucial for improved cardiovascular disease diagnosis and understanding of the heart&#39;s motion. However, current cardiac MRI-based reconstruction technology used in clinical settings is 2D with limited through-plane resolution, resulting in low-quality reconstructed cardiac volumes. To better reconstruct 3D cardiac vo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09223v1-abstract-full').style.display = 'inline'; document.getElementById('2308.09223v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.09223v1-abstract-full" style="display: none;"> Accurate 3D cardiac reconstruction from cine magnetic resonance imaging (cMRI) is crucial for improved cardiovascular disease diagnosis and understanding of the heart&#39;s motion. However, current cardiac MRI-based reconstruction technology used in clinical settings is 2D with limited through-plane resolution, resulting in low-quality reconstructed cardiac volumes. To better reconstruct 3D cardiac volumes from sparse 2D image stacks, we propose a morphology-guided diffusion model for 3D cardiac volume reconstruction, DMCVR, that synthesizes high-resolution 2D images and corresponding 3D reconstructed volumes. Our method outperforms previous approaches by conditioning the cardiac morphology on the generative model, eliminating the time-consuming iterative optimization process of the latent code, and improving generation quality. The learned latent spaces provide global semantics, local cardiac morphology and details of each 2D cMRI slice with highly interpretable value to reconstruct 3D cardiac shape. Our experiments show that DMCVR is highly effective in several aspects, such as 2D generation and 3D reconstruction performance. With DMCVR, we can produce high-resolution 3D cardiac MRI reconstructions, surpassing current techniques. Our proposed framework has great potential for improving the accuracy of cardiac disease diagnosis and treatment planning. Code can be accessed at https://github.com/hexiaoxiao-cs/DMCVR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09223v1-abstract-full').style.display = 'none'; document.getElementById('2308.09223v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in MICCAI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14125">arXiv:2306.14125</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.14125">pdf</a>, <a href="https://arxiv.org/format/2306.14125">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> M$^3$SC: A Generic Dataset for Mixed Multi-Modal (MMM) Sensing and Communication Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+Z">Ziwei Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Bai%2C+L">Lu Bai</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Haotian Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+M">Mingran Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boxun Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+S">Sijiang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+J">Jianan Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Lee%2C+M">Minson Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14125v1-abstract-short" style="display: inline;"> The sixth generation (6G) of mobile communication system is witnessing a new paradigm shift, i.e., integrated sensing-communication system. A comprehensive dataset is a prerequisite for 6G integrated sensing-communication research. This paper develops a novel simulation dataset, named M3SC, for mixed multi-modal (MMM) sensing-communication integration, and the generation framework of the M3SC data&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14125v1-abstract-full').style.display = 'inline'; document.getElementById('2306.14125v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14125v1-abstract-full" style="display: none;"> The sixth generation (6G) of mobile communication system is witnessing a new paradigm shift, i.e., integrated sensing-communication system. A comprehensive dataset is a prerequisite for 6G integrated sensing-communication research. This paper develops a novel simulation dataset, named M3SC, for mixed multi-modal (MMM) sensing-communication integration, and the generation framework of the M3SC dataset is further given. To obtain multi-modal sensory data in physical space and communication data in electromagnetic space, we utilize AirSim and WaveFarer to collect multi-modal sensory data and exploit Wireless InSite to collect communication data. Furthermore, the in-depth integration and precise alignment of AirSim, WaveFarer, and Wireless InSite are achieved. The M3SC dataset covers various weather conditions, various frequency bands, and different times of the day. Currently, the M3SC dataset contains 1500 snapshots, including 80 RGB images, 160 depth maps, 80 LiDAR point clouds, 256 sets of mmWave waveforms with 8 radar point clouds, and 72 channel impulse response (CIR) matrices per snapshot, thus totaling 120,000 RGB images, 240,000 depth maps, 120,000 LiDAR point clouds, 384,000 sets of mmWave waveforms with 12,000 radar point clouds, and 108,000 CIR matrices. The data processing result presents the multi-modal sensory information and communication channel statistical properties. Finally, the MMM sensing-communication application, which can be supported by the M3SC dataset, is discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14125v1-abstract-full').style.display = 'none'; document.getElementById('2306.14125v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.07505">arXiv:2306.07505</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.07505">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Tissues and Organs">q-bio.TO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Deep learning radiomics for assessment of gastroesophageal varices in people with compensated advanced chronic liver disease </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+L">Lan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+R">Ruiling He</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+L">Lili Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+J">Jia Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Geng%2C+Z">Zhengzi Geng</a>, <a href="/search/eess?searchtype=author&amp;query=Ren%2C+T">Tao Ren</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+G">Guo Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+P">Peng Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Tang%2C+K">Kaiqiang Tang</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+C">Chaofei Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+F">Fei Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+L">Liting Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+Y">Yonghe Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+X">Xin Li</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+F">Fanbin He</a>, <a href="/search/eess?searchtype=author&amp;query=Huan%2C+H">Hui Huan</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wenjuan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+Y">Yunxiao Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Tang%2C+J">Juan Tang</a>, <a href="/search/eess?searchtype=author&amp;query=Ai%2C+F">Fang Ai</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+T">Tingyu Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+L">Liyun Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+Z">Zhongwei Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Ji%2C+J">Jiansong Ji</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+W">Wei Liu</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.07505v1-abstract-short" style="display: inline;"> Objective: Bleeding from gastroesophageal varices (GEV) is a medical emergency associated with high mortality. We aim to construct an artificial intelligence-based model of two-dimensional shear wave elastography (2D-SWE) of the liver and spleen to precisely assess the risk of GEV and high-risk gastroesophageal varices (HRV). Design: A prospective multicenter study was conducted in patients with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07505v1-abstract-full').style.display = 'inline'; document.getElementById('2306.07505v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.07505v1-abstract-full" style="display: none;"> Objective: Bleeding from gastroesophageal varices (GEV) is a medical emergency associated with high mortality. We aim to construct an artificial intelligence-based model of two-dimensional shear wave elastography (2D-SWE) of the liver and spleen to precisely assess the risk of GEV and high-risk gastroesophageal varices (HRV). Design: A prospective multicenter study was conducted in patients with compensated advanced chronic liver disease. 305 patients were enrolled from 12 hospitals, and finally 265 patients were included, with 1136 liver stiffness measurement (LSM) images and 1042 spleen stiffness measurement (SSM) images generated by 2D-SWE. We leveraged deep learning methods to uncover associations between image features and patient risk, and thus conducted models to predict GEV and HRV. Results: A multi-modality Deep Learning Risk Prediction model (DLRP) was constructed to assess GEV and HRV, based on LSM and SSM images, and clinical information. Validation analysis revealed that the AUCs of DLRP were 0.91 for GEV (95% CI 0.90 to 0.93, p &lt; 0.05) and 0.88 for HRV (95% CI 0.86 to 0.89, p &lt; 0.01), which were significantly and robustly better than canonical risk indicators, including the value of LSM and SSM. Moreover, DLPR was better than the model using individual parameters, including LSM and SSM images. In HRV prediction, the 2D-SWE images of SSM outperform LSM (p &lt; 0.01). Conclusion: DLRP shows excellent performance in predicting GEV and HRV over canonical risk indicators LSM and SSM. Additionally, the 2D-SWE images of SSM provided more information for better accuracy in predicting HRV than the LSM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07505v1-abstract-full').style.display = 'none'; document.getElementById('2306.07505v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.18205">arXiv:2305.18205</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.18205">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TNS.2024.3444888">10.1109/TNS.2024.3444888 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Pulse shape discrimination based on the Tempotron: a powerful classifier on GPU </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+H">Haoran Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+P">Peng Li</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+M">Ming-Zhe Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+K">Kai-Ming Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zuo%2C+Z">Zhuo Zuo</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bing-Qi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.18205v2-abstract-short" style="display: inline;"> This study utilized the Tempotron, a robust classifier based on a third-generation neural network model, for pulse shape discrimination. By eliminating the need for manual feature extraction, the Tempotron model can process pulse signals directly, generating discrimination results based on prior knowledge. The study performed experiments using GPU acceleration, resulting in over 500 times faster c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18205v2-abstract-full').style.display = 'inline'; document.getElementById('2305.18205v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.18205v2-abstract-full" style="display: none;"> This study utilized the Tempotron, a robust classifier based on a third-generation neural network model, for pulse shape discrimination. By eliminating the need for manual feature extraction, the Tempotron model can process pulse signals directly, generating discrimination results based on prior knowledge. The study performed experiments using GPU acceleration, resulting in over 500 times faster compared to the CPU-based model, and investigated the impact of noise augmentation on the Tempotron performance. Experimental results substantiated that Tempotron serves as a formidable classifier, adept at accomplishing high discrimination accuracy on both AmBe and time-of-flight PuBe datasets. Furthermore, analyzing the neural activity of Tempotron during training shed light on its learning characteristics and aided in selecting its hyperparameters. Moreover, the study addressed the constraints and potential avenues for future development in utilizing the Tempotron for pulse shape discrimination. The dataset used in this study and the GPU-based Tempotron are publicly available on GitHub at https://github.com/HaoranLiu507/TempotronGPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18205v2-abstract-full').style.display = 'none'; document.getElementById('2305.18205v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Nuclear Science (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10788">arXiv:2305.10788</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.10788">pdf</a>, <a href="https://arxiv.org/format/2305.10788">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DQ-Whisper: Joint Distillation and Quantization for Efficient Multilingual Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Shao%2C+H">Hang Shao</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bei Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Gong%2C+X">Xun Gong</a>, <a href="/search/eess?searchtype=author&amp;query=Qian%2C+Y">Yanmin Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10788v2-abstract-short" style="display: inline;"> As a popular multilingual and multitask pre-trained speech model, Whisper has the problem of curse of multilinguality. To enhance multilingual capabilities in small Whisper models, we propose DQ-Whisper, a novel joint distillation and quantization framework to compress Whisper for efficient inference. Firstly, we propose a novel dynamic matching distillation strategy. Then, a quantization-aware di&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10788v2-abstract-full').style.display = 'inline'; document.getElementById('2305.10788v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10788v2-abstract-full" style="display: none;"> As a popular multilingual and multitask pre-trained speech model, Whisper has the problem of curse of multilinguality. To enhance multilingual capabilities in small Whisper models, we propose DQ-Whisper, a novel joint distillation and quantization framework to compress Whisper for efficient inference. Firstly, we propose a novel dynamic matching distillation strategy. Then, a quantization-aware distillation framework is introduced to integrate quantization with distillation. Experimental results on various multilingual datasets show that our suggested distillation approach can effectively enhance the multilingual capabilities of small Whisper models without increasing computational costs. Up to 5.18x reduction in model size is achieved with marginal performance degradation. In addition, quantization is compatible with distillation, which can result in a higher compression rate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10788v2-abstract-full').style.display = 'none'; document.getElementById('2305.10788v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SLT2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.12804">arXiv:2304.12804</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.12804">pdf</a>, <a href="https://arxiv.org/format/2304.12804">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Estimation and Signal Detection for NLOS Ultraviolet Scattering Communication with Space Division Multiple Access </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Y">Yubo Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Pan%2C+Y">Yuchen Pan</a>, <a href="/search/eess?searchtype=author&amp;query=Gong%2C+C">Chen Gong</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Beiyuan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Z">Zhengyuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.12804v1-abstract-short" style="display: inline;"> We design a receiver assembling several photomultipliers (PMTs) as an array to increase the field of view (FOV) of the receiver and adapt to multiuser situation over None-line-of-sight (NLOS) ultraviolet (UV) channels. Channel estimation and signal detection have been investigated according to the space division characteristics of the structure. Firstly, we adopt the balanced structure on the pilo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.12804v1-abstract-full').style.display = 'inline'; document.getElementById('2304.12804v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.12804v1-abstract-full" style="display: none;"> We design a receiver assembling several photomultipliers (PMTs) as an array to increase the field of view (FOV) of the receiver and adapt to multiuser situation over None-line-of-sight (NLOS) ultraviolet (UV) channels. Channel estimation and signal detection have been investigated according to the space division characteristics of the structure. Firstly, we adopt the balanced structure on the pilot matrix, analyze the channel estimation mean square error (MSE), and optimize the structure parameters. Then, with the estimated parameters, an analytical threshold detection rule is proposed as a preliminary work of multiuser detection. The detection rule can be optimized by analyzing the separability of two users based on the Gaussian approximation of Poisson weighted sum. To assess the effect of imperfect estimation, the sensitivity analysis of channel estimation error on two-user signal detection is performed. Moreover, we propose a successive elimination method for on-off keying (OOK) modulated multiuser symbol detection based on the previous threshold detection rule. A closed-form upper bound on the detection error rate is calculated, which turns out to be a good approximation of that of multiuser maximum-likelihood (ML) detection. The proposed successive elimination method is twenty times faster than the ML detection with negligible detection error rate degradation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.12804v1-abstract-full').style.display = 'none'; document.getElementById('2304.12804v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.09607">arXiv:2304.09607</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.09607">pdf</a>, <a href="https://arxiv.org/format/2304.09607">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CB-Conformer: Contextual biasing Conformer for biased word recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Y">Yaoxun Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Baiji Liu</a>, <a href="/search/eess?searchtype=author&amp;query=and%2C+Q+H">Qiaochu Huang and</a>, <a href="/search/eess?searchtype=author&amp;query=Song%2C+X">Xingchen Song</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Z">Zhiyong Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Kang%2C+S">Shiyin Kang</a>, <a href="/search/eess?searchtype=author&amp;query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.09607v2-abstract-short" style="display: inline;"> Due to the mismatch between the source and target domains, how to better utilize the biased word information to improve the performance of the automatic speech recognition model in the target domain becomes a hot research topic. Previous approaches either decode with a fixed external language model or introduce a sizeable biasing module, which leads to poor adaptability and slow inference. In this&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.09607v2-abstract-full').style.display = 'inline'; document.getElementById('2304.09607v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.09607v2-abstract-full" style="display: none;"> Due to the mismatch between the source and target domains, how to better utilize the biased word information to improve the performance of the automatic speech recognition model in the target domain becomes a hot research topic. Previous approaches either decode with a fixed external language model or introduce a sizeable biasing module, which leads to poor adaptability and slow inference. In this work, we propose CB-Conformer to improve biased word recognition by introducing the Contextual Biasing Module and the Self-Adaptive Language Model to vanilla Conformer. The Contextual Biasing Module combines audio fragments and contextual information, with only 0.2% model parameters of the original Conformer. The Self-Adaptive Language Model modifies the internal weights of biased words based on their recall and precision, resulting in a greater focus on biased words and more successful integration with the automatic speech recognition model than the standard fixed language model. In addition, we construct and release an open-source Mandarin biased-word dataset based on WenetSpeech. Experiments indicate that our proposed method brings a 15.34% character error rate reduction, a 14.13% biased word recall increase, and a 6.80% biased word F1-score increase compared with the base Conformer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.09607v2-abstract-full').style.display = 'none'; document.getElementById('2304.09607v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.02273">arXiv:2304.02273</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.02273">pdf</a>, <a href="https://arxiv.org/format/2304.02273">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MMVC: Learned Multi-Mode Video Compression with Block-based Prediction Mode Selection and Density-Adaptive Entropy Coding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bowen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yu Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Machineni%2C+R+C">Rakesh Chowdary Machineni</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+S">Shiyu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Kim%2C+H">Hun-Seok Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.02273v1-abstract-short" style="display: inline;"> Learning-based video compression has been extensively studied over the past years, but it still has limitations in adapting to various motion patterns and entropy models. In this paper, we propose multi-mode video compression (MMVC), a block wise mode ensemble deep video compression framework that selects the optimal mode for feature domain prediction adapting to different motion patterns. Propose&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.02273v1-abstract-full').style.display = 'inline'; document.getElementById('2304.02273v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.02273v1-abstract-full" style="display: none;"> Learning-based video compression has been extensively studied over the past years, but it still has limitations in adapting to various motion patterns and entropy models. In this paper, we propose multi-mode video compression (MMVC), a block wise mode ensemble deep video compression framework that selects the optimal mode for feature domain prediction adapting to different motion patterns. Proposed multi-modes include ConvLSTM-based feature domain prediction, optical flow conditioned feature domain prediction, and feature propagation to address a wide range of cases from static scenes without apparent motions to dynamic scenes with a moving camera. We partition the feature space into blocks for temporal prediction in spatial block-based representations. For entropy coding, we consider both dense and sparse post-quantization residual blocks, and apply optional run-length coding to sparse residuals to improve the compression rate. In this sense, our method uses a dual-mode entropy coding scheme guided by a binary density map, which offers significant rate reduction surpassing the extra cost of transmitting the binary selection map. We validate our scheme with some of the most popular benchmarking datasets. Compared with state-of-the-art video compression schemes and standard codecs, our method yields better or competitive results measured with PSNR and MS-SSIM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.02273v1-abstract-full').style.display = 'none'; document.getElementById('2304.02273v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14357">arXiv:2303.14357</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.14357">pdf</a>, <a href="https://arxiv.org/format/2303.14357">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dealing With Heterogeneous 3D MR Knee Images: A Federated Few-Shot Learning Method With Dual Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=He%2C+X">Xiaoxiao He</a>, <a href="/search/eess?searchtype=author&amp;query=Tan%2C+C">Chaowei Tan</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Si%2C+L">Liping Si</a>, <a href="/search/eess?searchtype=author&amp;query=Yao%2C+W">Weiwu Yao</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+L">Liang Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+D">Di Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhangli%2C+Q">Qilong Zhangli</a>, <a href="/search/eess?searchtype=author&amp;query=Chang%2C+Q">Qi Chang</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+K">Kang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Metaxas%2C+D+N">Dimitris N. Metaxas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14357v2-abstract-short" style="display: inline;"> Federated Learning has gained popularity among medical institutions since it enables collaborative training between clients (e.g., hospitals) without aggregating data. However, due to the high cost associated with creating annotations, especially for large 3D image datasets, clinical institutions do not have enough supervised data for training locally. Thus, the performance of the collaborative mo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14357v2-abstract-full').style.display = 'inline'; document.getElementById('2303.14357v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14357v2-abstract-full" style="display: none;"> Federated Learning has gained popularity among medical institutions since it enables collaborative training between clients (e.g., hospitals) without aggregating data. However, due to the high cost associated with creating annotations, especially for large 3D image datasets, clinical institutions do not have enough supervised data for training locally. Thus, the performance of the collaborative model is subpar under limited supervision. On the other hand, large institutions have the resources to compile data repositories with high-resolution images and labels. Therefore, individual clients can utilize the knowledge acquired in the public data repositories to mitigate the shortage of private annotated images. In this paper, we propose a federated few-shot learning method with dual knowledge distillation. This method allows joint training with limited annotations across clients without jeopardizing privacy. The supervised learning of the proposed method extracts features from limited labeled data in each client, while the unsupervised data is used to distill both feature and response-based knowledge from a national data repository to further improve the accuracy of the collaborative model and reduce the communication cost. Extensive evaluations are conducted on 3D magnetic resonance knee images from a private clinical dataset. Our proposed method shows superior performance and less training time than other semi-supervised federated learning methods. Codes and additional visualization results are available at https://github.com/hexiaoxiao-cs/fedml-knee. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14357v2-abstract-full').style.display = 'none'; document.getElementById('2303.14357v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.13161">arXiv:2302.13161</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.13161">pdf</a>, <a href="https://arxiv.org/format/2302.13161">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Cybersecurity Challenges of Power Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Rahimpour%2C+H">Hossein Rahimpour</a>, <a href="/search/eess?searchtype=author&amp;query=Tusek%2C+J">Joe Tusek</a>, <a href="/search/eess?searchtype=author&amp;query=Abuadbba%2C+A">Alsharif Abuadbba</a>, <a href="/search/eess?searchtype=author&amp;query=Seneviratne%2C+A">Aruna Seneviratne</a>, <a href="/search/eess?searchtype=author&amp;query=Phung%2C+T">Toan Phung</a>, <a href="/search/eess?searchtype=author&amp;query=Musleh%2C+A">Ahmed Musleh</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Boyu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.13161v2-abstract-short" style="display: inline;"> The rise of cyber threats on critical infrastructure and its potential for devastating consequences, has significantly increased. The dependency of new power grid technology on information, data analytic and communication systems make the entire electricity network vulnerable to cyber threats. Power transformers play a critical role within the power grid and are now commonly enhanced through facto&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.13161v2-abstract-full').style.display = 'inline'; document.getElementById('2302.13161v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.13161v2-abstract-full" style="display: none;"> The rise of cyber threats on critical infrastructure and its potential for devastating consequences, has significantly increased. The dependency of new power grid technology on information, data analytic and communication systems make the entire electricity network vulnerable to cyber threats. Power transformers play a critical role within the power grid and are now commonly enhanced through factory add-ons or intelligent monitoring systems added later to improve the condition monitoring of critical and long lead time assets such as transformers. However, the increased connectivity of those power transformers opens the door to more cyber attacks. Therefore, the need to detect and prevent cyber threats is becoming critical. The first step towards that would be a deeper understanding of the potential cyber-attacks landscape against power transformers. Much of the existing literature pays attention to smart equipment within electricity distribution networks, and most methods proposed are based on model-based detection algorithms. Moreover, only a few of these works address the security vulnerabilities of power elements, especially transformers within the transmission network. To the best of our knowledge, there is no study in the literature that systematically investigate the cybersecurity challenges against the newly emerged smart transformers. This paper addresses this shortcoming by exploring the vulnerabilities and the attack vectors of power transformers within electricity networks, the possible attack scenarios and the risks associated with these attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.13161v2-abstract-full').style.display = 'none'; document.getElementById('2302.13161v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.11728">arXiv:2302.11728</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.11728">pdf</a>, <a href="https://arxiv.org/format/2302.11728">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICIP49359.2023.10222276">10.1109/ICIP49359.2023.10222276 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Convolutional-Transformer Network for Crack Segmentation with Boundary Awareness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Tao%2C+H">Huaqi Tao</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bingxi Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+J">Jinqiang Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Hong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.11728v3-abstract-short" style="display: inline;"> Cracks play a crucial role in assessing the safety and durability of manufactured buildings. However, the long and sharp topological features and complex background of cracks make the task of crack segmentation extremely challenging. In this paper, we propose a novel convolutional-transformer network based on encoder-decoder architecture to solve this challenge. Particularly, we designed a Dilated&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11728v3-abstract-full').style.display = 'inline'; document.getElementById('2302.11728v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.11728v3-abstract-full" style="display: none;"> Cracks play a crucial role in assessing the safety and durability of manufactured buildings. However, the long and sharp topological features and complex background of cracks make the task of crack segmentation extremely challenging. In this paper, we propose a novel convolutional-transformer network based on encoder-decoder architecture to solve this challenge. Particularly, we designed a Dilated Residual Block (DRB) and a Boundary Awareness Module (BAM). The DRB pays attention to the local detail of cracks and adjusts the feature dimension for other blocks as needed. And the BAM learns the boundary features from the dilated crack label. Furthermore, the DRB is combined with a lightweight transformer that captures global information to serve as an effective encoder. Experimental results show that the proposed network performs better than state-of-the-art algorithms on two typical datasets. Datasets, code, and trained models are available for research at https://github.com/HqiTao/CT-crackseg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11728v3-abstract-full').style.display = 'none'; document.getElementById('2302.11728v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICIP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.07269">arXiv:2302.07269</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.07269">pdf</a>, <a href="https://arxiv.org/format/2302.07269">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1364/OE.486290">10.1364/OE.486290 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dual-mode adaptive-SVD ghost imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+D">Dajing Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Baolei Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Song%2C+J">Jiaqi Song</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yao Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Shan%2C+X">Xuchen Shan</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+F">Fan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.07269v1-abstract-short" style="display: inline;"> In this paper, we present a dual-mode adaptive singular value decomposition ghost imaging (A-SVD GI), which can be easily switched between the modes of imaging and edge detection. It can adaptively localize the foreground pixels via a threshold selection method. Then only the foreground region is illuminated by the singular value decomposition (SVD) - based patterns, consequently retrieving high-q&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.07269v1-abstract-full').style.display = 'inline'; document.getElementById('2302.07269v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.07269v1-abstract-full" style="display: none;"> In this paper, we present a dual-mode adaptive singular value decomposition ghost imaging (A-SVD GI), which can be easily switched between the modes of imaging and edge detection. It can adaptively localize the foreground pixels via a threshold selection method. Then only the foreground region is illuminated by the singular value decomposition (SVD) - based patterns, consequently retrieving high-quality images with fewer sampling ratios. By changing the selecting range of foreground pixels, the A-SVD GI can be switched to the mode of edge detection to directly reveal the edge of objects, without needing the original image. We investigate the performance of these two modes through both numerical simulations and experiments. We also develop a single-round scheme to halve measurement numbers in experiments, instead of separately illuminating positive and negative patterns in traditional methods. The binarized SVD patterns, generated by the spatial dithering method, are modulated by a digital micromirror device (DMD) to speed up the data acquisition. This dual-mode A-SVD GI can be applied in various applications, such as remote sensing or target recognition, and could be further extended for multi-modality functional imaging/detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.07269v1-abstract-full').style.display = 'none'; document.getElementById('2302.07269v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.05726">arXiv:2302.05726</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.05726">pdf</a>, <a href="https://arxiv.org/format/2302.05726">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Enhance Local Consistency in Federated Learning: A Multi-Step Inertial Momentum Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Y">Yixing Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+Y">Yan Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+Z">Zhengtao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Shen%2C+L">Li Shen</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.05726v1-abstract-short" style="display: inline;"> Federated learning (FL), as a collaborative distributed training paradigm with several edge computing devices under the coordination of a centralized server, is plagued by inconsistent local stationary points due to the heterogeneity of the local partial participation clients, which precipitates the local client-drifts problems and sparks off the unstable and slow convergence, especially on the ag&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05726v1-abstract-full').style.display = 'inline'; document.getElementById('2302.05726v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.05726v1-abstract-full" style="display: none;"> Federated learning (FL), as a collaborative distributed training paradigm with several edge computing devices under the coordination of a centralized server, is plagued by inconsistent local stationary points due to the heterogeneity of the local partial participation clients, which precipitates the local client-drifts problems and sparks off the unstable and slow convergence, especially on the aggravated heterogeneous dataset. To address these issues, we propose a novel federated learning algorithm, named FedMIM, which adopts the multi-step inertial momentum on the edge devices and enhances the local consistency for free during the training to improve the robustness of the heterogeneity. Specifically, we incorporate the weighted global gradient estimations as the inertial correction terms to guide both the local iterates and stochastic gradient estimation, which can reckon the global objective optimization on the edges&#39; heterogeneous dataset naturally and maintain the demanding consistent iteration locally. Theoretically, we show that FedMIM achieves the $\mathcal{O}(\frac{1}{\sqrt{SKT}})$ convergence rate with a linear speedup property with respect to the number of selected clients $S$ and proper local interval $K$ in communication round $T$ without convex assumption. Empirically, we conduct comprehensive experiments on various real-world datasets and demonstrate the efficacy of the proposed FedMIM against several state-of-the-art baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05726v1-abstract-full').style.display = 'none'; document.getElementById('2302.05726v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.00657">arXiv:2301.00657</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.00657">pdf</a>, <a href="https://arxiv.org/format/2301.00657">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MnTTS2: An Open-Source Multi-Speaker Mongolian Text-to-Speech Synthesis Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liang%2C+K">Kailin Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Hu%2C+Y">Yifan Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+R">Rui Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Bao%2C+F">Feilong Bao</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+G">Guanglai Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.00657v1-abstract-short" style="display: inline;"> Text-to-Speech (TTS) synthesis for low-resource languages is an attractive research issue in academia and industry nowadays. Mongolian is the official language of the Inner Mongolia Autonomous Region and a representative low-resource language spoken by over 10 million people worldwide. However, there is a relative lack of open-source datasets for Mongolian TTS. Therefore, we make public an open-so&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.00657v1-abstract-full').style.display = 'inline'; document.getElementById('2301.00657v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.00657v1-abstract-full" style="display: none;"> Text-to-Speech (TTS) synthesis for low-resource languages is an attractive research issue in academia and industry nowadays. Mongolian is the official language of the Inner Mongolia Autonomous Region and a representative low-resource language spoken by over 10 million people worldwide. However, there is a relative lack of open-source datasets for Mongolian TTS. Therefore, we make public an open-source multi-speaker Mongolian TTS dataset, named MnTTS2, for the benefit of related researchers. In this work, we prepare the transcription from various topics and invite three professional Mongolian announcers to form a three-speaker TTS dataset, in which each announcer records 10 hours of speeches in Mongolian, resulting 30 hours in total. Furthermore, we build the baseline system based on the state-of-the-art FastSpeech2 model and HiFi-GAN vocoder. The experimental results suggest that the constructed MnTTS2 dataset is sufficient to build robust multi-speaker TTS models for real-world applications. The MnTTS2 dataset, training recipe, and pretrained models are released at: \url{https://github.com/ssmlkl/MnTTS2} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.00657v1-abstract-full').style.display = 'none'; document.getElementById('2301.00657v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NCMMSC&#39;2022 (https://ncmmsc2022.ustc.edu.cn/main.htm)</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Liu%2C+B&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10