CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;45 of 45 results for author: <span class="mathjax">Jia, S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&amp;query=Jia%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jia, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jia%2C+S&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jia, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08883">arXiv:2408.08883</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08883">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> MR Optimized Reconstruction of Simultaneous Multi-Slice Imaging Using Diffusion Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+T">Ting Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuoxu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Q">Qingyong Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+C">Congcong Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+Y">Yihang Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haifeng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08883v2-abstract-short" style="display: inline;"> Diffusion model has been successfully applied to MRI reconstruction, including single and multi-coil acquisition of MRI data. Simultaneous multi-slice imaging (SMS), as a method for accelerating MR acquisition, can significantly reduce scanning time, but further optimization of reconstruction results is still possible. In order to optimize the reconstruction of SMS, we proposed a method to use dif&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08883v2-abstract-full').style.display = 'inline'; document.getElementById('2408.08883v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08883v2-abstract-full" style="display: none;"> Diffusion model has been successfully applied to MRI reconstruction, including single and multi-coil acquisition of MRI data. Simultaneous multi-slice imaging (SMS), as a method for accelerating MR acquisition, can significantly reduce scanning time, but further optimization of reconstruction results is still possible. In order to optimize the reconstruction of SMS, we proposed a method to use diffusion model based on slice-GRAPPA and SPIRiT method. approach: Specifically, our method characterizes the prior distribution of SMS data by score matching and characterizes the k-space redundant prior between coils and slices based on self-consistency. With the utilization of diffusion model, we achieved better reconstruction results.The application of diffusion model can further reduce the scanning time of MRI without compromising image quality, making it more advantageous for clinical application <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08883v2-abstract-full').style.display = 'none'; document.getElementById('2408.08883v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as ISMRM 2024 Digital Poster 4024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ISMRM 2024 Digital poster 4024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02704">arXiv:2402.02704</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.02704">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Knowledge-driven deep learning for fast MR imaging: undersampled MR image reconstruction from supervised to un-supervised learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+S">Shanshan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+R">Ruoyou Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Diakite%2C+A">Alou Diakite</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Cheng Li</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Q">Qiegen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Ying%2C+L">Leslie Ying</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02704v1-abstract-short" style="display: inline;"> Deep learning (DL) has emerged as a leading approach in accelerating MR imaging. It employs deep neural networks to extract knowledge from available datasets and then applies the trained networks to reconstruct accurate images from limited measurements. Unlike natural image restoration problems, MR imaging involves physics-based imaging processes, unique data properties, and diverse imaging tasks.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02704v1-abstract-full').style.display = 'inline'; document.getElementById('2402.02704v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02704v1-abstract-full" style="display: none;"> Deep learning (DL) has emerged as a leading approach in accelerating MR imaging. It employs deep neural networks to extract knowledge from available datasets and then applies the trained networks to reconstruct accurate images from limited measurements. Unlike natural image restoration problems, MR imaging involves physics-based imaging processes, unique data properties, and diverse imaging tasks. This domain knowledge needs to be integrated with data-driven approaches. Our review will introduce the significant challenges faced by such knowledge-driven DL approaches in the context of fast MR imaging along with several notable solutions, which include learning neural networks and addressing different imaging application scenarios. The traits and trends of these techniques have also been given which have shifted from supervised learning to semi-supervised learning, and finally, to unsupervised learning methods. In addition, MR vendors&#39; choices of DL reconstruction have been provided along with some discussions on open questions and future directions, which are critical for the reliable imaging systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02704v1-abstract-full').style.display = 'none'; document.getElementById('2402.02704v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">46 pages, 5figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.03559">arXiv:2310.03559</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.03559">pdf</a>, <a href="https://arxiv.org/format/2310.03559">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MedSyn: Text-guided Anatomy-aware Synthesis of High-Fidelity 3D CT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Y">Yanwu Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+L">Li Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Peng%2C+W">Wei Peng</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuyue Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Morrison%2C+K">Katelyn Morrison</a>, <a href="/search/eess?searchtype=author&amp;query=Perer%2C+A">Adam Perer</a>, <a href="/search/eess?searchtype=author&amp;query=Zandifar%2C+A">Afrooz Zandifar</a>, <a href="/search/eess?searchtype=author&amp;query=Visweswaran%2C+S">Shyam Visweswaran</a>, <a href="/search/eess?searchtype=author&amp;query=Eslami%2C+M">Motahhare Eslami</a>, <a href="/search/eess?searchtype=author&amp;query=Batmanghelich%2C+K">Kayhan Batmanghelich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.03559v6-abstract-short" style="display: inline;"> This paper introduces an innovative methodology for producing high-quality 3D lung CT images guided by textual information. While diffusion-based generative models are increasingly used in medical imaging, current state-of-the-art approaches are limited to low-resolution outputs and underutilize radiology reports&#39; abundant information. The radiology reports can enhance the generation process by pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03559v6-abstract-full').style.display = 'inline'; document.getElementById('2310.03559v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.03559v6-abstract-full" style="display: none;"> This paper introduces an innovative methodology for producing high-quality 3D lung CT images guided by textual information. While diffusion-based generative models are increasingly used in medical imaging, current state-of-the-art approaches are limited to low-resolution outputs and underutilize radiology reports&#39; abundant information. The radiology reports can enhance the generation process by providing additional guidance and offering fine-grained control over the synthesis of images. Nevertheless, expanding text-guided generation to high-resolution 3D images poses significant memory and anatomical detail-preserving challenges. Addressing the memory issue, we introduce a hierarchical scheme that uses a modified UNet architecture. We start by synthesizing low-resolution images conditioned on the text, serving as a foundation for subsequent generators for complete volumetric data. To ensure the anatomical plausibility of the generated samples, we provide further guidance by generating vascular, airway, and lobular segmentation masks in conjunction with the CT images. The model demonstrates the capability to use textual input and segmentation tasks to generate synthesized images. The results of comparative assessments indicate that our approach exhibits superior performance compared to the most advanced models based on GAN and diffusion techniques, especially in accurately retaining crucial anatomical features such as fissure lines, airways, and vascular structures. This innovation introduces novel possibilities. This study focuses on two main objectives: (1) the development of a method for creating images based on textual prompts and anatomical components, and (2) the capability to generate new images conditioning on anatomical elements. The advancements in image generation can be applied to enhance numerous downstream tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03559v6-abstract-full').style.display = 'none'; document.getElementById('2310.03559v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.00240">arXiv:2310.00240</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.00240">pdf</a>, <a href="https://arxiv.org/format/2310.00240">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Learning Mask-aware CLIP Representations for Zero-Shot Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Siyu Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Y">Yunchao Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yaowei Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+Y">Yao Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+H">Humphrey Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.00240v1-abstract-short" style="display: inline;"> Recently, pre-trained vision-language models have been increasingly used to tackle the challenging zero-shot segmentation task. Typical solutions follow the paradigm of first generating mask proposals and then adopting CLIP to classify them. To maintain the CLIP&#39;s zero-shot transferability, previous practices favour to freeze CLIP during training. However, in the paper, we reveal that CLIP is inse&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00240v1-abstract-full').style.display = 'inline'; document.getElementById('2310.00240v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.00240v1-abstract-full" style="display: none;"> Recently, pre-trained vision-language models have been increasingly used to tackle the challenging zero-shot segmentation task. Typical solutions follow the paradigm of first generating mask proposals and then adopting CLIP to classify them. To maintain the CLIP&#39;s zero-shot transferability, previous practices favour to freeze CLIP during training. However, in the paper, we reveal that CLIP is insensitive to different mask proposals and tends to produce similar predictions for various mask proposals of the same image. This insensitivity results in numerous false positives when classifying mask proposals. This issue mainly relates to the fact that CLIP is trained with image-level supervision. To alleviate this issue, we propose a simple yet effective method, named Mask-aware Fine-tuning (MAFT). Specifically, Image-Proposals CLIP Encoder (IP-CLIP Encoder) is proposed to handle arbitrary numbers of image and mask proposals simultaneously. Then, mask-aware loss and self-distillation loss are designed to fine-tune IP-CLIP Encoder, ensuring CLIP is responsive to different mask proposals while not sacrificing transferability. In this way, mask-aware representations can be easily learned to make the true positives stand out. Notably, our solution can seamlessly plug into most existing methods without introducing any new parameters during the fine-tuning process. We conduct extensive experiments on the popular zero-shot benchmarks. With MAFT, the performance of the state-of-the-art methods is promoted by a large margin: 50.4% (+ 8.2%) on COCO, 81.8% (+ 3.2%) on Pascal-VOC, and 8.7% (+4.3%) on ADE20K in terms of mIoU for unseen classes. The code is available at https://github.com/jiaosiyu1999/MAFT.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00240v1-abstract-full').style.display = 'none'; document.getElementById('2310.00240v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.03953">arXiv:2308.03953</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.03953">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIM.2023.3311065">10.1109/TIM.2023.3311065 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PMU measurements based short-term voltage stability assessment of power systems via deep transfer learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+S">Shitu Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+Y">Yuanzheng Li</a>, <a href="/search/eess?searchtype=author&amp;query=Cao%2C+J">Jiting Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuyue Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.03953v2-abstract-short" style="display: inline;"> Deep learning has emerged as an effective solution for addressing the challenges of short-term voltage stability assessment (STVSA) in power systems. However, existing deep learning-based STVSA approaches face limitations in adapting to topological changes, sample labeling, and handling small datasets. To overcome these challenges, this paper proposes a novel phasor measurement unit (PMU) measurem&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03953v2-abstract-full').style.display = 'inline'; document.getElementById('2308.03953v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.03953v2-abstract-full" style="display: none;"> Deep learning has emerged as an effective solution for addressing the challenges of short-term voltage stability assessment (STVSA) in power systems. However, existing deep learning-based STVSA approaches face limitations in adapting to topological changes, sample labeling, and handling small datasets. To overcome these challenges, this paper proposes a novel phasor measurement unit (PMU) measurements-based STVSA method by using deep transfer learning. The method leverages the real-time dynamic information captured by PMUs to create an initial dataset. It employs temporal ensembling for sample labeling and utilizes least squares generative adversarial networks (LSGAN) for data augmentation, enabling effective deep learning on small-scale datasets. Additionally, the method enhances adaptability to topological changes by exploring connections between different faults. Experimental results on the IEEE 39-bus test system demonstrate that the proposed method improves model evaluation accuracy by approximately 20% through transfer learning, exhibiting strong adaptability to topological changes. Leveraging the self-attention mechanism of the Transformer model, this approach offers significant advantages over shallow learning methods and other deep learning-based approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03953v2-abstract-full').style.display = 'none'; document.getElementById('2308.03953v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Instrumentation &amp; Measurement</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Instrumentation and Measurement 72 (2023) 2526111 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.14491">arXiv:2307.14491</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.14491">pdf</a>, <a href="https://arxiv.org/format/2307.14491">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> A Unified Framework for Modality-Agnostic Deepfakes Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yu%2C+C">Cai Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+P">Peng Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Tian%2C+J">Jiahe Tian</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+J">Jin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Dai%2C+J">Jiao Dai</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+X">Xi Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Chai%2C+Y">Yesheng Chai</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shan Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Lyu%2C+S">Siwei Lyu</a>, <a href="/search/eess?searchtype=author&amp;query=Han%2C+J">Jizhong Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.14491v2-abstract-short" style="display: inline;"> As AI-generated content (AIGC) thrives, deepfakes have expanded from single-modality falsification to cross-modal fake content creation, where either audio or visual components can be manipulated. While using two unimodal detectors can detect audio-visual deepfakes, cross-modal forgery clues could be overlooked. Existing multimodal deepfake detection methods typically establish correspondence betw&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14491v2-abstract-full').style.display = 'inline'; document.getElementById('2307.14491v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.14491v2-abstract-full" style="display: none;"> As AI-generated content (AIGC) thrives, deepfakes have expanded from single-modality falsification to cross-modal fake content creation, where either audio or visual components can be manipulated. While using two unimodal detectors can detect audio-visual deepfakes, cross-modal forgery clues could be overlooked. Existing multimodal deepfake detection methods typically establish correspondence between the audio and visual modalities for binary real/fake classification, and require the co-occurrence of both modalities. However, in real-world multi-modal applications, missing modality scenarios may occur where either modality is unavailable. In such cases, audio-visual detection methods are less practical than two independent unimodal methods. Consequently, the detector can not always obtain the number or type of manipulated modalities beforehand, necessitating a fake-modality-agnostic audio-visual detector. In this work, we introduce a comprehensive framework that is agnostic to fake modalities, which facilitates the identification of multimodal deepfakes and handles situations with missing modalities, regardless of the manipulations embedded in audio, video, or even cross-modal forms. To enhance the modeling of cross-modal forgery clues, we employ audio-visual speech recognition (AVSR) as a preliminary task. This efficiently extracts speech correlations across modalities, a feature challenging for deepfakes to replicate. Additionally, we propose a dual-label detection approach that follows the structure of AVSR to support the independent detection of each modality. Extensive experiments on three audio-visual datasets show that our scheme outperforms state-of-the-art detection methods with promising performance on modality-agnostic audio/video deepfakes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14491v2-abstract-full').style.display = 'none'; document.getElementById('2307.14491v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14276">arXiv:2306.14276</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.14276">pdf</a>, <a href="https://arxiv.org/format/2306.14276">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Aeroacoustic Source Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xue%2C+W">Weicheng Xue</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+B">Bing Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shaohong Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14276v2-abstract-short" style="display: inline;"> The deconvolutional DAMAS algorithm can effectively eliminate the misconceptions in the usually-used beamforming localization algorithm, allowing for more accurate calculation of the source location as well as the intensity. When solving a linear system of equations, the DAMAS algorithm takes into account the mutual influence of different locations, reducing or even eliminating sidelobes and produ&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14276v2-abstract-full').style.display = 'inline'; document.getElementById('2306.14276v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14276v2-abstract-full" style="display: none;"> The deconvolutional DAMAS algorithm can effectively eliminate the misconceptions in the usually-used beamforming localization algorithm, allowing for more accurate calculation of the source location as well as the intensity. When solving a linear system of equations, the DAMAS algorithm takes into account the mutual influence of different locations, reducing or even eliminating sidelobes and producing more accurate results. This work first introduces the principles of the DAMAS algorithm. Then it applies both the beamforming algorithm and the DAMAS algorithm to simulate the localization of a single-frequency source from a 1.5 MW wind turbine, a complex line source with the text &#34;UCAS&#34; and a line source downstream of an airfoil trailing edge. Finally, the work presents experimental localization results of the source of a 1.5 MW wind turbine using both the beamforming algorithm and the DAMAS algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14276v2-abstract-full').style.display = 'none'; document.getElementById('2306.14276v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.13085">arXiv:2304.13085</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.13085">pdf</a>, <a href="https://arxiv.org/format/2304.13085">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> AI-Synthesized Voice Detection Using Neural Vocoder Artifacts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Sun%2C+C">Chengzhe Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shan Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Hou%2C+S">Shuwei Hou</a>, <a href="/search/eess?searchtype=author&amp;query=Lyu%2C+S">Siwei Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.13085v2-abstract-short" style="display: inline;"> Advancements in AI-synthesized human voices have created a growing threat of impersonation and disinformation, making it crucial to develop methods to detect synthetic human voices. This study proposes a new approach to identifying synthetic human voices by detecting artifacts of vocoders in audio signals. Most DeepFake audio synthesis models use a neural vocoder, a neural network that generates w&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.13085v2-abstract-full').style.display = 'inline'; document.getElementById('2304.13085v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.13085v2-abstract-full" style="display: none;"> Advancements in AI-synthesized human voices have created a growing threat of impersonation and disinformation, making it crucial to develop methods to detect synthetic human voices. This study proposes a new approach to identifying synthetic human voices by detecting artifacts of vocoders in audio signals. Most DeepFake audio synthesis models use a neural vocoder, a neural network that generates waveforms from temporal-frequency representations like mel-spectrograms. By identifying neural vocoder processing in audio, we can determine if a sample is synthesized. To detect synthetic human voices, we introduce a multi-task learning framework for a binary-class RawNet2 model that shares the feature extractor with a vocoder identification module. By treating vocoder identification as a pretext task, we constrain the feature extractor to focus on vocoder artifacts and provide discriminative features for the final binary classifier. Our experiments show that the improved RawNet2 model based on vocoder identification achieves high classification performance on the binary task overall. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.13085v2-abstract-full').style.display = 'none'; document.getElementById('2304.13085v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted in CVPRW 2023. Codes and data can be found at https://github.com/csun22/Synthetic-Voice-Detection-Vocoder-Artifacts. arXiv admin note: substantial text overlap with arXiv:2302.09198</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.02722">arXiv:2303.02722</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.02722">pdf</a>, <a href="https://arxiv.org/format/2303.02722">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Performance of OTFS-NOMA Scheme for Coordinated Direct and Relay Transmission Networks in High-Mobility Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Y">Yao Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Du%2C+Z">Zhen Du</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+W">Weijie Yuan</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shaobo Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Leung%2C+V+C+M">Victor C. M. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.02722v1-abstract-short" style="display: inline;"> In this letter, an orthogonal time frequency space (OTFS) based non-orthogonal multiple access (NOMA) scheme is investigated for the coordinated direct and relay transmission system, where a source directly communicates with a near user with high mobile speed, and it needs the relaying assistance to serve the far user also having high mobility. Due to the coexistence of signal superposition coding&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02722v1-abstract-full').style.display = 'inline'; document.getElementById('2303.02722v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.02722v1-abstract-full" style="display: none;"> In this letter, an orthogonal time frequency space (OTFS) based non-orthogonal multiple access (NOMA) scheme is investigated for the coordinated direct and relay transmission system, where a source directly communicates with a near user with high mobile speed, and it needs the relaying assistance to serve the far user also having high mobility. Due to the coexistence of signal superposition coding and multi-domain transformation, the performance of OTFS-based NOMA is usually challenging to be measured from a theoretical perspective. To accurately evaluate the system performance of the proposed scheme, we derive the closed-form expressions for the outage probability and the outage sum rate by using the Inversion formula and characteristic function. Numerical results verify the performance superiority and the effectiveness of the proposed scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02722v1-abstract-full').style.display = 'none'; document.getElementById('2303.02722v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09198">arXiv:2302.09198</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.09198">pdf</a>, <a href="https://arxiv.org/format/2302.09198">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Exposing AI-Synthesized Human Voices Using Neural Vocoder Artifacts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Sun%2C+C">Chengzhe Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shan Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Hou%2C+S">Shuwei Hou</a>, <a href="/search/eess?searchtype=author&amp;query=AlBadawy%2C+E">Ehab AlBadawy</a>, <a href="/search/eess?searchtype=author&amp;query=Lyu%2C+S">Siwei Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09198v2-abstract-short" style="display: inline;"> The advancements of AI-synthesized human voices have introduced a growing threat of impersonation and disinformation. It is therefore of practical importance to developdetection methods for synthetic human voices. This work proposes a new approach to detect synthetic human voices based on identifying artifacts of neural vocoders in audio signals. A neural vocoder is a specially designed neural net&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09198v2-abstract-full').style.display = 'inline'; document.getElementById('2302.09198v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09198v2-abstract-full" style="display: none;"> The advancements of AI-synthesized human voices have introduced a growing threat of impersonation and disinformation. It is therefore of practical importance to developdetection methods for synthetic human voices. This work proposes a new approach to detect synthetic human voices based on identifying artifacts of neural vocoders in audio signals. A neural vocoder is a specially designed neural network that synthesizes waveforms from temporal-frequency representations, e.g., mel-spectrograms. The neural vocoder is a core component in most DeepFake audio synthesis models. Hence the identification of neural vocoder processing implies that an audio sample may have been synthesized. To take advantage of the vocoder artifacts for synthetic human voice detection, we introduce a multi-task learning framework for a binary-class RawNet2 model that shares the front-end feature extractor with a vocoder identification module. We treat the vocoder identification as a pretext task to constrain the front-end feature extractor to focus on vocoder artifacts and provide discriminative features for the final binary classifier. Our experiments show that the improved RawNet2 model based on vocoder identification achieves an overall high classification performance on the binary task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09198v2-abstract-full').style.display = 'none'; document.getElementById('2302.09198v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Dataset and codes will be available at https://github.com/csun22/LibriVoc-Dataset</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.11274">arXiv:2212.11274</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.11274">pdf</a>, <a href="https://arxiv.org/format/2212.11274">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SPIRiT-Diffusion: SPIRiT-driven Score-Based Generative Modeling for Vessel Wall imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cao%2C+C">Chentao Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuo-Xu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+H">Hairong Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.11274v1-abstract-short" style="display: inline;"> Diffusion model is the most advanced method in image generation and has been successfully applied to MRI reconstruction. However, the existing methods do not consider the characteristics of multi-coil acquisition of MRI data. Therefore, we give a new diffusion model, called SPIRiT-Diffusion, based on the SPIRiT iterative reconstruction algorithm. Specifically, SPIRiT-Diffusion characterizes the pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.11274v1-abstract-full').style.display = 'inline'; document.getElementById('2212.11274v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.11274v1-abstract-full" style="display: none;"> Diffusion model is the most advanced method in image generation and has been successfully applied to MRI reconstruction. However, the existing methods do not consider the characteristics of multi-coil acquisition of MRI data. Therefore, we give a new diffusion model, called SPIRiT-Diffusion, based on the SPIRiT iterative reconstruction algorithm. Specifically, SPIRiT-Diffusion characterizes the prior distribution of coil-by-coil images by score matching and characterizes the k-space redundant prior between coils based on self-consistency. With sufficient prior constraint utilized, we achieve superior reconstruction results on the joint Intracranial and Carotid Vessel Wall imaging dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.11274v1-abstract-full').style.display = 'none'; document.getElementById('2212.11274v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ISMRM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.07641">arXiv:2211.07641</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.07641">pdf</a>, <a href="https://arxiv.org/format/2211.07641">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Motif-topology improved Spiking Neural Network for the Cocktail Party Effect and McGurk Effect </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuncheng Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+T">Tielin Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Zuo%2C+R">Ruichen Zuo</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+B">Bo Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.07641v1-abstract-short" style="display: inline;"> Network architectures and learning principles are playing key in forming complex functions in artificial neural networks (ANNs) and spiking neural networks (SNNs). SNNs are considered the new-generation artificial networks by incorporating more biological features than ANNs, including dynamic spiking neurons, functionally specified architectures, and efficient learning paradigms. Network architect&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.07641v1-abstract-full').style.display = 'inline'; document.getElementById('2211.07641v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.07641v1-abstract-full" style="display: none;"> Network architectures and learning principles are playing key in forming complex functions in artificial neural networks (ANNs) and spiking neural networks (SNNs). SNNs are considered the new-generation artificial networks by incorporating more biological features than ANNs, including dynamic spiking neurons, functionally specified architectures, and efficient learning paradigms. Network architectures are also considered embodying the function of the network. Here, we propose a Motif-topology improved SNN (M-SNN) for the efficient multi-sensory integration and cognitive phenomenon simulations. The cognitive phenomenon simulation we simulated includes the cocktail party effect and McGurk effect, which are discussed by many researchers. Our M-SNN constituted by the meta operator called network motifs. The source of 3-node network motifs topology from artificial one pre-learned from the spatial or temporal dataset. In the single-sensory classification task, the results showed the accuracy of M-SNN using network motif topologies was higher than the pure feedforward network topology without using them. In the multi-sensory integration task, the performance of M-SNN using artificial network motif was better than the state-of-the-art SNN using BRP (biologically-plausible reward propagation). Furthermore, the M-SNN could better simulate the cocktail party effect and McGurk effect with lower computational cost. We think the artificial network motifs could be considered as some prior knowledge that would contribute to the multi-sensory integration of SNNs and provide more benefits for simulating the cognitive phenomenon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.07641v1-abstract-full').style.display = 'none'; document.getElementById('2211.07641v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.12253">arXiv:2209.12253</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.12253">pdf</a>, <a href="https://arxiv.org/ps/2209.12253">ps</a>, <a href="https://arxiv.org/format/2209.12253">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Robust Beamforming Design for WPT-assisted D2D Communications in MISO-NOMA: Fractional Programming and Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shiyu Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Fang%2C+F">Fang Fang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+Z">Zhiguo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.12253v3-abstract-short" style="display: inline;"> This paper proposes a scheme for the envisioned sixth-generation (6G) ultra-massive Machine Type Communications(umMTC). In particular, wireless power transfer (WPT) assisted communication is deployed in non-orthogonal multiple access (NOMA) downlink networks to realize spectrum and energy cooperation. This paper focuses on joint robust beamforming design to maximize the energy efficiency of WPT-as&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12253v3-abstract-full').style.display = 'inline'; document.getElementById('2209.12253v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.12253v3-abstract-full" style="display: none;"> This paper proposes a scheme for the envisioned sixth-generation (6G) ultra-massive Machine Type Communications(umMTC). In particular, wireless power transfer (WPT) assisted communication is deployed in non-orthogonal multiple access (NOMA) downlink networks to realize spectrum and energy cooperation. This paper focuses on joint robust beamforming design to maximize the energy efficiency of WPT-assisted D2D communications in multiple-input single-output (MISO)-NOMA downlink networks. To efficiently address the formulated non-concave energy efficiency maximization problem, a pure fractional programming (PFP) algorithm is proposed, where the time switching coefficient of the WPT device and the beamforming vectors of the base station are alternatively optimized by applying the Dinkelbach method and quadratic transform respectively. To prove the optimality of the proposed algorithm, the partial exhaustive search algorithm is proposed as a benchmark. A deep reinforcement learning (DRL)-based method is also applied to directly solve the non-concave problem. The proposed PFP algorithm and the DDPG-based algorithm are compared in the presence of different channel estimation errors. Simulation results show that the proposed PFP algorithm outperforms the DDPG-based algorithm if perfect channel state information (CSI) can be obtained or just have minor errors, while the DDPG-based algorithm is more robust when the channel estimation accuracy is unsatisfactory. On the other hand, one can conclude that the NOMA scheme can provide a higher gain than OMA on the energy efficiency of the WPT-assisted D2D communication in legacy multi-user downlink networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12253v3-abstract-full').style.display = 'none'; document.getElementById('2209.12253v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.10451">arXiv:2209.10451</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.10451">pdf</a>, <a href="https://arxiv.org/format/2209.10451">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Learning from Mixed Datasets: A Monotonic Image Quality Assessment Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Feng%2C+Z">Zhaopeng Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+K">Keyang Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuyue Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+B">Baoliang Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+S">Shiqi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.10451v3-abstract-short" style="display: inline;"> Deep learning based image quality assessment (IQA) models usually learn to predict image quality from a single dataset, leading the model to overfit specific scenes. To account for this, mixed datasets training can be an effective way to enhance the generalization capability of the model. However, it is nontrivial to combine different IQA datasets, as their quality evaluation criteria, score range&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.10451v3-abstract-full').style.display = 'inline'; document.getElementById('2209.10451v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.10451v3-abstract-full" style="display: none;"> Deep learning based image quality assessment (IQA) models usually learn to predict image quality from a single dataset, leading the model to overfit specific scenes. To account for this, mixed datasets training can be an effective way to enhance the generalization capability of the model. However, it is nontrivial to combine different IQA datasets, as their quality evaluation criteria, score ranges, view conditions, as well as subjects are usually not shared during the image quality annotation. In this paper, instead of aligning the annotations, we propose a monotonic neural network for IQA model learning with different datasets combined. In particular, our model consists of a dataset-shared quality regressor and several dataset-specific quality transformers. The quality regressor aims to obtain the perceptual qualities of each dataset while each quality transformer maps the perceptual qualities to the corresponding dataset annotations with their monotonicity maintained. The experimental results verify the effectiveness of the proposed learning strategy and our code is available at https://github.com/fzp0424/MonotonicIQA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.10451v3-abstract-full').style.display = 'none'; document.getElementById('2209.10451v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.03904">arXiv:2208.03904</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.03904">pdf</a>, <a href="https://arxiv.org/format/2208.03904">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SelfCoLearn: Self-supervised collaborative learning for accelerating dynamic MR imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zou%2C+J">Juan Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Cheng Li</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+R">Ruoyou Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Pei%2C+T">Tingrui Pei</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+H">Hairong Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+S">Shanshan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.03904v1-abstract-short" style="display: inline;"> Lately, deep learning has been extensively investigated for accelerating dynamic magnetic resonance (MR) imaging, with encouraging progresses achieved. However, without fully sampled reference data for training, current approaches may have limited abilities in recovering fine details or structures. To address this challenge, this paper proposes a self-supervised collaborative learning framework (S&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.03904v1-abstract-full').style.display = 'inline'; document.getElementById('2208.03904v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.03904v1-abstract-full" style="display: none;"> Lately, deep learning has been extensively investigated for accelerating dynamic magnetic resonance (MR) imaging, with encouraging progresses achieved. However, without fully sampled reference data for training, current approaches may have limited abilities in recovering fine details or structures. To address this challenge, this paper proposes a self-supervised collaborative learning framework (SelfCoLearn) for accurate dynamic MR image reconstruction from undersampled k-space data. The proposed framework is equipped with three important components, namely, dual-network collaborative learning, reunderampling data augmentation and a specially designed co-training loss. The framework is flexible to be integrated with both data-driven networks and model-based iterative un-rolled networks. Our method has been evaluated on in-vivo dataset and compared it to four state-of-the-art methods. Results show that our method possesses strong capabilities in capturing essential and inherent representations for direct reconstructions from the undersampled k-space data and thus enables high-quality and fast dynamic MR imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.03904v1-abstract-full').style.display = 'none'; document.getElementById('2208.03904v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages,9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.03663">arXiv:2205.03663</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.03663">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Playing Tic-Tac-Toe Games with Intelligent Single-pixel Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+J">Jiaxiang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wei Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Z">Zibang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.03663v1-abstract-short" style="display: inline;"> Single-pixel imaging (SPI) is a novel optical imaging technique by replacing a two-dimensional pixelated sensor with a single-pixel detector and pattern illuminations. SPI have been extensively used for various tasks related to image acquisition and processing. In this work, a novel non-image-based task of playing Tic-Tac-Toe games interactively is merged into the framework of SPI. An optoelectron&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03663v1-abstract-full').style.display = 'inline'; document.getElementById('2205.03663v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.03663v1-abstract-full" style="display: none;"> Single-pixel imaging (SPI) is a novel optical imaging technique by replacing a two-dimensional pixelated sensor with a single-pixel detector and pattern illuminations. SPI have been extensively used for various tasks related to image acquisition and processing. In this work, a novel non-image-based task of playing Tic-Tac-Toe games interactively is merged into the framework of SPI. An optoelectronic artificial intelligent (AI) player with minimal digital computation can detect the game states, generate optimal moves and display output results mainly by pattern illumination and single-pixel detection. Simulated and experimental results demonstrate the feasibility of proposed scheme and its unbeatable performance against human players. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03663v1-abstract-full').style.display = 'none'; document.getElementById('2205.03663v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.09891">arXiv:2112.09891</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.09891">pdf</a>, <a href="https://arxiv.org/format/2112.09891">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Equilibrated Zeroth-Order Unrolled Deep Networks for Accelerated MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuo-Xu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Q">Qingyong Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Y">Yuanyuan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+K">Kankan Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Ke%2C+Z">Ziwen Ke</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenqi Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haifeng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.09891v2-abstract-short" style="display: inline;"> Recently, model-driven deep learning unrolls a certain iterative algorithm of a regularization model into a cascade network by replacing the first-order information (i.e., (sub)gradient or proximal operator) of the regularizer with a network module, which appears more explainable and predictable compared to common data-driven networks. Conversely, in theory, there is not necessarily such a functio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09891v2-abstract-full').style.display = 'inline'; document.getElementById('2112.09891v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.09891v2-abstract-full" style="display: none;"> Recently, model-driven deep learning unrolls a certain iterative algorithm of a regularization model into a cascade network by replacing the first-order information (i.e., (sub)gradient or proximal operator) of the regularizer with a network module, which appears more explainable and predictable compared to common data-driven networks. Conversely, in theory, there is not necessarily such a functional regularizer whose first-order information matches the replaced network module, which means the network output may not be covered by the original regularization model. Moreover, up to now, there is also no theory to guarantee the global convergence and robustness (regularity) of unrolled networks under realistic assumptions. To bridge this gap, this paper propose to present a safeguarded methodology on network unrolling. Specifically, focusing on accelerated MRI, we unroll a zeroth-order algorithm, of which the network module represents the regularizer itself, so that the network output can be still covered by the regularization model. Furthermore, inspired by the ideal of deep equilibrium models, before backpropagating, we carry out the unrolled iterative network to converge to a fixed point to ensure the convergence. In case the measurement data contains noise, we prove that the proposed network is robust against noisy interference. Finally, numerical experiments show that the proposed network consistently outperforms the state-of-the-art MRI reconstruction methods including traditional regularization methods and other deep learning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09891v2-abstract-full').style.display = 'none'; document.getElementById('2112.09891v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.01800">arXiv:2112.01800</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.01800">pdf</a>, <a href="https://arxiv.org/format/2112.01800">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neucom.2021.03.035">10.1016/j.neucom.2021.03.035 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Survey: Deep Learning for Hyperspectral Image Classification with Few Labeled Samples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+S">Shuguo Jiang</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+Z">Zhijie Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+N">Nanying Li</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+M">Meng Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Yu%2C+S">Shiqi Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.01800v1-abstract-short" style="display: inline;"> With the rapid development of deep learning technology and improvement in computing capability, deep learning has been widely used in the field of hyperspectral image (HSI) classification. In general, deep learning models often contain many trainable parameters and require a massive number of labeled samples to achieve optimal performance. However, in regard to HSI classification, a large number o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01800v1-abstract-full').style.display = 'inline'; document.getElementById('2112.01800v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.01800v1-abstract-full" style="display: none;"> With the rapid development of deep learning technology and improvement in computing capability, deep learning has been widely used in the field of hyperspectral image (HSI) classification. In general, deep learning models often contain many trainable parameters and require a massive number of labeled samples to achieve optimal performance. However, in regard to HSI classification, a large number of labeled samples is generally difficult to acquire due to the difficulty and time-consuming nature of manual labeling. Therefore, many research works focus on building a deep learning model for HSI classification with few labeled samples. In this article, we concentrate on this topic and provide a systematic review of the relevant literature. Specifically, the contributions of this paper are twofold. First, the research progress of related methods is categorized according to the learning paradigm, including transfer learning, active learning and few-shot learning. Second, a number of experiments with various state-of-the-art approaches has been carried out, and the results are summarized to reveal the potential research directions. More importantly, it is notable that although there is a vast gap between deep learning models (that usually need sufficient labeled samples) and the HSI scenario with few labeled samples, the issues of small-sample sets can be well characterized by fusion of deep learning methods and related techniques, such as transfer learning and a lightweight model. For reproducibility, the source codes of the methods assessed in the paper can be found at https://github.com/ShuGuoJ/HSI-Classification.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01800v1-abstract-full').style.display = 'none'; document.getElementById('2112.01800v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neurocomputing, Volume 448, 2021, Pages 179-204 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.05547">arXiv:2108.05547</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2108.05547">pdf</a>, <a href="https://arxiv.org/format/2108.05547">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Amended Gradient Descent for Efficient Spectral Reconstruction from Single RGB Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Z">Zhiyu Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+H">Hui Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Hou%2C+J">Junhui Hou</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Q">Qingfu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.05547v1-abstract-short" style="display: inline;"> This paper investigates the problem of recovering hyperspectral (HS) images from single RGB images. To tackle such a severely ill-posed problem, we propose a physically-interpretable, compact, efficient, and end-to-end learning-based framework, namely AGD-Net. Precisely, by taking advantage of the imaging process, we first formulate the problem explicitly based on the classic gradient descent algo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.05547v1-abstract-full').style.display = 'inline'; document.getElementById('2108.05547v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.05547v1-abstract-full" style="display: none;"> This paper investigates the problem of recovering hyperspectral (HS) images from single RGB images. To tackle such a severely ill-posed problem, we propose a physically-interpretable, compact, efficient, and end-to-end learning-based framework, namely AGD-Net. Precisely, by taking advantage of the imaging process, we first formulate the problem explicitly based on the classic gradient descent algorithm. Then, we design a lightweight neural network with a multi-stage architecture to mimic the formed amended gradient descent process, in which efficient convolution and novel spectral zero-mean normalization are proposed to effectively extract spatial-spectral features for regressing an initialization, a basic gradient, and an incremental gradient. Besides, based on the approximate low-rank property of HS images, we propose a novel rank loss to promote the similarity between the global structures of reconstructed and ground-truth HS images, which is optimized with our singular value weighting strategy during training. Moreover, AGD-Net, a single network after one-time training, is flexible to handle the reconstruction with various spectral response functions. Extensive experiments over three commonly-used benchmark datasets demonstrate that AGD-Net can improve the reconstruction quality by more than 1.0 dB on average while saving 67$\times$ parameters and 32$\times$ FLOPs, compared with state-of-the-art methods. The code will be publicly available at https://github.com/zbzhzhy/GD-Net. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.05547v1-abstract-full').style.display = 'none'; document.getElementById('2108.05547v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.06536">arXiv:2107.06536</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2107.06536">pdf</a>, <a href="https://arxiv.org/ps/2107.06536">ps</a>, <a href="https://arxiv.org/format/2107.06536">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Attention Generative Adversarial Network for Remote Sensing Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xu%2C+M">Meng Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Z">Zhihao Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+J">Jiasong Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+X">Xiuping Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.06536v1-abstract-short" style="display: inline;"> Image super-resolution (SR) methods can generate remote sensing images with high spatial resolution without increasing the cost, thereby providing a feasible way to acquire high-resolution remote sensing images, which are difficult to obtain due to the high cost of acquisition equipment and complex weather. Clearly, image super-resolution is a severe ill-posed problem. Fortunately, with the develo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06536v1-abstract-full').style.display = 'inline'; document.getElementById('2107.06536v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.06536v1-abstract-full" style="display: none;"> Image super-resolution (SR) methods can generate remote sensing images with high spatial resolution without increasing the cost, thereby providing a feasible way to acquire high-resolution remote sensing images, which are difficult to obtain due to the high cost of acquisition equipment and complex weather. Clearly, image super-resolution is a severe ill-posed problem. Fortunately, with the development of deep learning, the powerful fitting ability of deep neural networks has solved this problem to some extent. In this paper, we propose a network based on the generative adversarial network (GAN) to generate high resolution remote sensing images, named the multi-attention generative adversarial network (MA-GAN). We first designed a GAN-based framework for the image SR task. The core to accomplishing the SR task is the image generator with post-upsampling that we designed. The main body of the generator contains two blocks; one is the pyramidal convolution in the residual-dense block (PCRDB), and the other is the attention-based upsample (AUP) block. The attentioned pyramidal convolution (AttPConv) in the PCRDB block is a module that combines multi-scale convolution and channel attention to automatically learn and adjust the scaling of the residuals for better results. The AUP block is a module that combines pixel attention (PA) to perform arbitrary multiples of upsampling. These two blocks work together to help generate better quality images. For the loss function, we design a loss function based on pixel loss and introduce both adversarial loss and feature loss to guide the generator learning. We have compared our method with several state-of-the-art methods on a remote sensing scene image dataset, and the experimental results consistently demonstrate the effectiveness of the proposed MA-GAN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06536v1-abstract-full').style.display = 'none'; document.getElementById('2107.06536v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.09616">arXiv:2106.09616</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.09616">pdf</a>, <a href="https://arxiv.org/ps/2106.09616">ps</a>, <a href="https://arxiv.org/format/2106.09616">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Reinforcement Learning Based Optimization for IRS Based UAV-NOMA Downlink Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shiyu Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Xie%2C+X">Ximing Xie</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+Z">Zhiguo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.09616v1-abstract-short" style="display: inline;"> This paper investigates the application of deep deterministic policy gradient (DDPG) to intelligent reflecting surface (IRS) based unmanned aerial vehicles (UAV) assisted non-orthogonal multiple access (NOMA) downlink networks. The deployment of the UAV equipped with an IRS is important, as the UAV increases the flexibility of the IRS significantly, especially for the case of users who have no lin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.09616v1-abstract-full').style.display = 'inline'; document.getElementById('2106.09616v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.09616v1-abstract-full" style="display: none;"> This paper investigates the application of deep deterministic policy gradient (DDPG) to intelligent reflecting surface (IRS) based unmanned aerial vehicles (UAV) assisted non-orthogonal multiple access (NOMA) downlink networks. The deployment of the UAV equipped with an IRS is important, as the UAV increases the flexibility of the IRS significantly, especially for the case of users who have no line of sight (LoS) path to the base station (BS). Therefore, the aim of this letter is to maximize the sum rate by jointly optimizing the power allocation of the BS, the phase shifting of the IRS and the horizontal position of the UAV. Because the formulated problem is not convex, the DDPG algorithm is utilized to solve it. The computer simulation results are provided to show the superior performance of the proposed DDPG based algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.09616v1-abstract-full').style.display = 'none'; document.getElementById('2106.09616v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.09611">arXiv:2106.09611</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.09611">pdf</a>, <a href="https://arxiv.org/format/2106.09611">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Reinforcement Learning Approach for an IRS-assisted NOMA Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xie%2C+X">Ximing Xie</a>, <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shiyu Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+Z">Zhiguo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.09611v1-abstract-short" style="display: inline;"> This letter investigates a sum rate maximizationproblem in an intelligent reflective surface (IRS) assisted non-orthogonal multiple access (NOMA) downlink network. Specif-ically, the sum rate of all the users is maximized by jointlyoptimizing the beams at the base station and the phase shiftat the IRS. The deep reinforcement learning (DRL), which hasachieved massive successes, is applied to solve&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.09611v1-abstract-full').style.display = 'inline'; document.getElementById('2106.09611v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.09611v1-abstract-full" style="display: none;"> This letter investigates a sum rate maximizationproblem in an intelligent reflective surface (IRS) assisted non-orthogonal multiple access (NOMA) downlink network. Specif-ically, the sum rate of all the users is maximized by jointlyoptimizing the beams at the base station and the phase shiftat the IRS. The deep reinforcement learning (DRL), which hasachieved massive successes, is applied to solve this sum ratemaximization problem. In particular, an algorithm based on thedeep deterministic policy gradient (DDPG) is proposed. Both therandom channel case and the fixed channel case are studied inthis letter. The simulation result illustrates that the DDPG basedalgorithm has the competitive performance on both case. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.09611v1-abstract-full').style.display = 'none'; document.getElementById('2106.09611v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.06907">arXiv:2106.06907</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.06907">pdf</a>, <a href="https://arxiv.org/format/2106.06907">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIFS.2022.3189530">10.1109/TIFS.2022.3189530 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ADVERT: An Adaptive and Data-Driven Attention Enhancement Mechanism for Phishing Prevention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Linan Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shumeng Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Balcetis%2C+E">Emily Balcetis</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Q">Quanyan Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.06907v3-abstract-short" style="display: inline;"> Attacks exploiting the innate and the acquired vulnerabilities of human users have posed severe threats to cybersecurity. This work proposes ADVERT, a human-technical solution that generates adaptive visual aids in real-time to prevent users from inadvertence and reduce their susceptibility to phishing attacks. Based on the eye-tracking data, we extract visual states and attention states as system&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.06907v3-abstract-full').style.display = 'inline'; document.getElementById('2106.06907v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.06907v3-abstract-full" style="display: none;"> Attacks exploiting the innate and the acquired vulnerabilities of human users have posed severe threats to cybersecurity. This work proposes ADVERT, a human-technical solution that generates adaptive visual aids in real-time to prevent users from inadvertence and reduce their susceptibility to phishing attacks. Based on the eye-tracking data, we extract visual states and attention states as system-level sufficient statistics to characterize the user&#39;s visual behaviors and attention status. By adopting a data-driven approach and two learning feedback of different time scales, this work lays out a theoretical foundation to analyze, evaluate, and particularly modify humans&#39; attention processes while they vet and recognize phishing emails. We corroborate the effectiveness, efficiency, and robustness of ADVERT through a case study based on the data set collected from human subject experiments conducted at New York University. The results show that the visual aids can statistically increase the attention level and improve the accuracy of phishing recognition from 74.6% to a minimum of 86%. The meta-adaptation can further improve the accuracy to 91.5% (resp. 93.7%) in less than 3 (resp. 50) tuning stages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.06907v3-abstract-full').style.display = 'none'; document.getElementById('2106.06907v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.05901">arXiv:2104.05901</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2104.05901">pdf</a>, <a href="https://arxiv.org/format/2104.05901">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> SRR-Net: A Super-Resolution-Involved Reconstruction Method for High Resolution MR Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenqi Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Ke%2C+Z">Ziwen Ke</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuo-Xu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.05901v1-abstract-short" style="display: inline;"> Improving the image resolution and acquisition speed of magnetic resonance imaging (MRI) is a challenging problem. There are mainly two strategies dealing with the speed-resolution trade-off: (1) $k$-space undersampling with high-resolution acquisition, and (2) a pipeline of lower resolution image reconstruction and image super-resolution. However, these approaches either have limited performance&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.05901v1-abstract-full').style.display = 'inline'; document.getElementById('2104.05901v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.05901v1-abstract-full" style="display: none;"> Improving the image resolution and acquisition speed of magnetic resonance imaging (MRI) is a challenging problem. There are mainly two strategies dealing with the speed-resolution trade-off: (1) $k$-space undersampling with high-resolution acquisition, and (2) a pipeline of lower resolution image reconstruction and image super-resolution. However, these approaches either have limited performance at certain high acceleration factor or suffer from the error accumulation of two-step structure. In this paper, we combine the idea of MR reconstruction and image super-resolution, and work on recovering HR images from low-resolution under-sampled $k$-space data directly. Particularly, the SR-involved reconstruction can be formulated as a variational problem, and a learnable network unrolled from its solution algorithm is proposed. A discriminator was introduced to enhance the detail refining performance. Experiment results using in-vivo HR multi-coil brain data indicate that the proposed SRR-Net is capable of recovering high-resolution brain images with both good visual quality and perceptual quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.05901v1-abstract-full').style.display = 'none'; document.getElementById('2104.05901v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.01102">arXiv:2104.01102</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2104.01102">pdf</a>, <a href="https://arxiv.org/format/2104.01102">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Manifold Learning for Dynamic MR Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ke%2C+Z">Ziwen Ke</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuo-Xu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenqi Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haifeng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+X">Xin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+H">Hairong Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Ying%2C+L">Leslie Ying</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.01102v1-abstract-short" style="display: inline;"> Purpose: To develop a deep learning method on a nonlinear manifold to explore the temporal redundancy of dynamic signals to reconstruct cardiac MRI data from highly undersampled measurements. Methods: Cardiac MR image reconstruction is modeled as general compressed sensing (CS) based optimization on a low-rank tensor manifold. The nonlinear manifold is designed to characterize the temporal corre&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.01102v1-abstract-full').style.display = 'inline'; document.getElementById('2104.01102v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.01102v1-abstract-full" style="display: none;"> Purpose: To develop a deep learning method on a nonlinear manifold to explore the temporal redundancy of dynamic signals to reconstruct cardiac MRI data from highly undersampled measurements. Methods: Cardiac MR image reconstruction is modeled as general compressed sensing (CS) based optimization on a low-rank tensor manifold. The nonlinear manifold is designed to characterize the temporal correlation of dynamic signals. Iterative procedures can be obtained by solving the optimization model on the manifold, including gradient calculation, projection of the gradient to tangent space, and retraction of the tangent space to the manifold. The iterative procedures on the manifold are unrolled to a neural network, dubbed as Manifold-Net. The Manifold-Net is trained using in vivo data with a retrospective electrocardiogram (ECG)-gated segmented bSSFP sequence. Results: Experimental results at high accelerations demonstrate that the proposed method can obtain improved reconstruction compared with a compressed sensing (CS) method k-t SLR and two state-of-the-art deep learning-based methods, DC-CNN and CRNN. Conclusion: This work represents the first study unrolling the optimization on manifolds into neural networks. Specifically, the designed low-rank manifold provides a new technical route for applying low-rank priors in dynamic MR imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.01102v1-abstract-full').style.display = 'none'; document.getElementById('2104.01102v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.13677">arXiv:2010.13677</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.13677">pdf</a>, <a href="https://arxiv.org/format/2010.13677">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Low-rank plus Sparse Network for Dynamic MR Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenqi Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Ke%2C+Z">Ziwen Ke</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuo-Xu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+Z">Zhilang Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Ying%2C+L">Leslie Ying</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.13677v3-abstract-short" style="display: inline;"> In dynamic magnetic resonance (MR) imaging, low-rank plus sparse (L+S) decomposition, or robust principal component analysis (PCA), has achieved stunning performance. However, the selection of the parameters of L+S is empirical, and the acceleration rate is limited, which are common failings of iterative compressed sensing MR imaging (CS-MRI) reconstruction methods. Many deep learning approaches h&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13677v3-abstract-full').style.display = 'inline'; document.getElementById('2010.13677v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.13677v3-abstract-full" style="display: none;"> In dynamic magnetic resonance (MR) imaging, low-rank plus sparse (L+S) decomposition, or robust principal component analysis (PCA), has achieved stunning performance. However, the selection of the parameters of L+S is empirical, and the acceleration rate is limited, which are common failings of iterative compressed sensing MR imaging (CS-MRI) reconstruction methods. Many deep learning approaches have been proposed to address these issues, but few of them use a low-rank prior. In this paper, a model-based low-rank plus sparse network, dubbed L+S-Net, is proposed for dynamic MR reconstruction. In particular, we use an alternating linearized minimization method to solve the optimization problem with low-rank and sparse regularization. Learned soft singular value thresholding is introduced to ensure the clear separation of the L component and S component. Then, the iterative steps are unrolled into a network in which the regularization parameters are learnable. We prove that the proposed L+S-Net achieves global convergence under two standard assumptions. Experiments on retrospective and prospective cardiac cine datasets show that the proposed model outperforms state-of-the-art CS and existing deep learning methods and has great potential for extremely high acceleration factors (up to 24x). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13677v3-abstract-full').style.display = 'none'; document.getElementById('2010.13677v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.13484">arXiv:2007.13484</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.13484">pdf</a>, <a href="https://arxiv.org/format/2007.13484">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Attention-based Graph ResNet for Motor Intent Detection from Raw EEG signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuyue Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Hou%2C+Y">Yimin Hou</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+Y">Yan Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+Y">Yang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.13484v1-abstract-short" style="display: inline;"> In previous studies, decoding electroencephalography (EEG) signals has not considered the topological relationship of EEG electrodes. However, the latest neuroscience has suggested brain network connectivity. Thus, the exhibited interaction between EEG channels might not be appropriately measured via Euclidean distance. To fill the gap, an attention-based graph residual network, a novel structure&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13484v1-abstract-full').style.display = 'inline'; document.getElementById('2007.13484v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.13484v1-abstract-full" style="display: none;"> In previous studies, decoding electroencephalography (EEG) signals has not considered the topological relationship of EEG electrodes. However, the latest neuroscience has suggested brain network connectivity. Thus, the exhibited interaction between EEG channels might not be appropriately measured via Euclidean distance. To fill the gap, an attention-based graph residual network, a novel structure of Graph Convolutional Neural Network (GCN), was presented to detect human motor intents from raw EEG signals, where the topological structure of EEG electrodes was built as a graph. Meanwhile, deep residual learning with a full-attention architecture was introduced to address the degradation problem concerning deeper networks in raw EEG motor imagery (MI) data. Individual variability, the critical and longstanding challenge underlying EEG signals, has been successfully handled with the state-of-the-art performance, 98.08% accuracy at the subject level, 94.28% for 20 subjects. Numerical results were promising that the implementation of the graph-structured topology was superior to decode raw EEG data. The innovative deep learning approach was expected to entail a universal method towards both neuroscience research and real-world EEG-based practical applications, e.g., seizure prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13484v1-abstract-full').style.display = 'none'; document.getElementById('2007.13484v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.12090">arXiv:2006.12090</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.12090">pdf</a>, <a href="https://arxiv.org/format/2006.12090">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Low-rank Prior in Dynamic MR Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ke%2C+Z">Ziwen Ke</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenqi Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Cui%2C+Z">Zhuoxu Cui</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haifeng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+X">Xin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+H">Hairong Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Ying%2C+L">Leslie Ying</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.12090v4-abstract-short" style="display: inline;"> The deep learning methods have achieved attractive performance in dynamic MR cine imaging. However, all of these methods are only driven by the sparse prior of MR images, while the important low-rank (LR) prior of dynamic MR cine images is not explored, which limits the further improvements on dynamic MR reconstruction. In this paper, a learned singular value thresholding (Learned-SVT) operation i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.12090v4-abstract-full').style.display = 'inline'; document.getElementById('2006.12090v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.12090v4-abstract-full" style="display: none;"> The deep learning methods have achieved attractive performance in dynamic MR cine imaging. However, all of these methods are only driven by the sparse prior of MR images, while the important low-rank (LR) prior of dynamic MR cine images is not explored, which limits the further improvements on dynamic MR reconstruction. In this paper, a learned singular value thresholding (Learned-SVT) operation is proposed to explore deep low-rank prior in dynamic MR imaging for obtaining improved reconstruction results. In particular, we come up with two novel and distinct schemes to introduce the learnable low-rank prior into deep network architectures in an unrolling manner and a plug-and-play manner respectively. In the unrolling manner, we put forward a model-based unrolling sparse and low-rank network for dynamic MR imaging, dubbed SLR-Net. The SLR-Net is defined over a deep network flow graph, which is unrolled from the iterative procedures in the Iterative Shrinkage-Thresholding Algorithm (ISTA) for optimizing a sparse and low-rank based dynamic MRI model. In the plug-and-play manner, we present a plug-and-play LR network module that can be easily embedded into any other dynamic MR neural networks without changing the network paradigm. Experimental results show that both schemes can further improve the state-of-the-art CS methods, such as k-t SLR, and sparsity-driven deep learning-based methods, such as DC-CNN and CRNN, both qualitatively and quantitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.12090v4-abstract-full').style.display = 'none'; document.getElementById('2006.12090v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.10320">arXiv:2006.10320</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.10320">pdf</a>, <a href="https://arxiv.org/format/2006.10320">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Reconfigurable Intelligent Surfaces for Energy Efficiency in D2D Communication Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuaiqi Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaojun Yuan</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+Y">Ying-Chang Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.10320v1-abstract-short" style="display: inline;"> In this letter, the joint power control of D2D users and the passive beamforming of reconfigurable intelligent surfaces (RIS) for a RIS-aided device-to-device (D2D) communication network is investigated to maximize energy efficiency. This non-convex optimization problem is divided into two subproblems, which are passive beamforming and power control. The two subproblems are optimized alternately.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.10320v1-abstract-full').style.display = 'inline'; document.getElementById('2006.10320v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.10320v1-abstract-full" style="display: none;"> In this letter, the joint power control of D2D users and the passive beamforming of reconfigurable intelligent surfaces (RIS) for a RIS-aided device-to-device (D2D) communication network is investigated to maximize energy efficiency. This non-convex optimization problem is divided into two subproblems, which are passive beamforming and power control. The two subproblems are optimized alternately. We first decouple the passive beamforming at RIS based on the Lagrangian dual transform. This problem is solved by using fractional programming. Then we optimize the power control by using the Dinkelbach method. By iteratively solving the two subproblems, we obtain a suboptimal solution for the joint optimization problem. Numerical results have verified the effectiveness of the proposed algorithm, which can significantly improve the energy efficiency of the D2D network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.10320v1-abstract-full').style.display = 'none'; document.getElementById('2006.10320v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.08924">arXiv:2006.08924</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.08924">pdf</a>, <a href="https://arxiv.org/format/2006.08924">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TNNLS.2022.3202569">10.1109/TNNLS.2022.3202569 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GCNs-Net: A Graph Convolutional Neural Network Approach for Decoding Time-resolved EEG Motor Imagery Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Hou%2C+Y">Yimin Hou</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuyue Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Lun%2C+X">Xiangmin Lun</a>, <a href="/search/eess?searchtype=author&amp;query=Hao%2C+Z">Ziqian Hao</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+Y">Yan Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+Y">Yang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zeng%2C+R">Rui Zeng</a>, <a href="/search/eess?searchtype=author&amp;query=Lv%2C+J">Jinglei Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.08924v4-abstract-short" style="display: inline;"> Towards developing effective and efficient brain-computer interface (BCI) systems, precise decoding of brain activity measured by electroencephalogram (EEG), is highly demanded. Traditional works classify EEG signals without considering the topological relationship among electrodes. However, neuroscience research has increasingly emphasized network patterns of brain dynamics. Thus, the Euclidean s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.08924v4-abstract-full').style.display = 'inline'; document.getElementById('2006.08924v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.08924v4-abstract-full" style="display: none;"> Towards developing effective and efficient brain-computer interface (BCI) systems, precise decoding of brain activity measured by electroencephalogram (EEG), is highly demanded. Traditional works classify EEG signals without considering the topological relationship among electrodes. However, neuroscience research has increasingly emphasized network patterns of brain dynamics. Thus, the Euclidean structure of electrodes might not adequately reflect the interaction between signals. To fill the gap, a novel deep learning framework based on the graph convolutional neural networks (GCNs) is presented to enhance the decoding performance of raw EEG signals during different types of motor imagery (MI) tasks while cooperating with the functional topological relationship of electrodes. Based on the absolute Pearson&#39;s matrix of overall signals, the graph Laplacian of EEG electrodes is built up. The GCNs-Net constructed by graph convolutional layers learns the generalized features. The followed pooling layers reduce dimensionality, and the fully-connected softmax layer derives the final prediction. The introduced approach has been shown to converge for both personalized and group-wise predictions. It has achieved the highest averaged accuracy, 93.06% and 88.57% (PhysioNet Dataset), 96.24% and 80.89% (High Gamma Dataset), at the subject and group level, respectively, compared with existing studies, which suggests adaptability and robustness to individual variability. Moreover, the performance is stably reproducible among repetitive experiments for cross-validation. The excellent performance of our method has shown that it is an important step towards better BCI approaches. To conclude, the GCNs-Net filters EEG signals based on the functional topological relationship, which manages to decode relevant features for brain motor imagery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.08924v4-abstract-full').style.display = 'none'; document.getElementById('2006.08924v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.06514">arXiv:2005.06514</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.06514">pdf</a>, <a href="https://arxiv.org/format/2005.06514">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> 3D Face Anti-spoofing with Factorized Bilinear Coding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shan Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+X">Xin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Hu%2C+C">Chuanbo Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+G">Guodong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Z">Zhengquan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.06514v3-abstract-short" style="display: inline;"> We have witnessed rapid advances in both face presentation attack models and presentation attack detection (PAD) in recent years. When compared with widely studied 2D face presentation attacks, 3D face spoofing attacks are more challenging because face recognition systems are more easily confused by the 3D characteristics of materials similar to real faces. In this work, we tackle the problem of d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.06514v3-abstract-full').style.display = 'inline'; document.getElementById('2005.06514v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.06514v3-abstract-full" style="display: none;"> We have witnessed rapid advances in both face presentation attack models and presentation attack detection (PAD) in recent years. When compared with widely studied 2D face presentation attacks, 3D face spoofing attacks are more challenging because face recognition systems are more easily confused by the 3D characteristics of materials similar to real faces. In this work, we tackle the problem of detecting these realistic 3D face presentation attacks, and propose a novel anti-spoofing method from the perspective of fine-grained classification. Our method, based on factorized bilinear coding of multiple color channels (namely MC\_FBC), targets at learning subtle fine-grained differences between real and fake images. By extracting discriminative and fusing complementary information from RGB and YCbCr spaces, we have developed a principled solution to 3D face spoofing detection. A large-scale wax figure face database (WFFD) with both images and videos has also been collected as super-realistic attacks to facilitate the study of 3D face presentation attack detection. Extensive experimental results show that our proposed method achieves the state-of-the-art performance on both our own WFFD and other face spoofing databases under various intra-database and inter-database testing scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.06514v3-abstract-full').style.display = 'none'; document.getElementById('2005.06514v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:1910.05457</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.00777">arXiv:2005.00777</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.00777">pdf</a>, <a href="https://arxiv.org/format/2005.00777">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Feature Mining via Attention-based BiLSTM-GCN for Human Motor Imagery Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Hou%2C+Y">Yimin Hou</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuyue Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Lun%2C+X">Xiangmin Lun</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+S">Shu Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+T">Tao Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+F">Fang Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Lv%2C+J">Jinglei Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.00777v3-abstract-short" style="display: inline;"> Recognition accuracy and response time are both critically essential ahead of building practical electroencephalography (EEG) based brain-computer interface (BCI). Recent approaches, however, have either compromised in the classification accuracy or responding time. This paper presents a novel deep learning approach designed towards remarkably accurate and responsive motor imagery (MI) recognition&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00777v3-abstract-full').style.display = 'inline'; document.getElementById('2005.00777v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.00777v3-abstract-full" style="display: none;"> Recognition accuracy and response time are both critically essential ahead of building practical electroencephalography (EEG) based brain-computer interface (BCI). Recent approaches, however, have either compromised in the classification accuracy or responding time. This paper presents a novel deep learning approach designed towards remarkably accurate and responsive motor imagery (MI) recognition based on scalp EEG. Bidirectional Long Short-term Memory (BiLSTM) with the Attention mechanism manages to derive relevant features from raw EEG signals. The connected graph convolutional neural network (GCN) promotes the decoding performance by cooperating with the topological structure of features, which are estimated from the overall data. The 0.4-second detection framework has shown effective and efficient prediction based on individual and group-wise training, with 98.81% and 94.64% accuracy, respectively, which outperformed all the state-of-the-art studies. The introduced deep feature mining approach can precisely recognize human motion intents from raw EEG signals, which paves the road to translate the EEG based MI recognition to practical BCI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00777v3-abstract-full').style.display = 'none'; document.getElementById('2005.00777v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.12314">arXiv:2004.12314</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.12314">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Global Benchmark of Algorithms for Segmenting Late Gadolinium-Enhanced Cardiac Magnetic Resonance Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xiong%2C+Z">Zhaohan Xiong</a>, <a href="/search/eess?searchtype=author&amp;query=Xia%2C+Q">Qing Xia</a>, <a href="/search/eess?searchtype=author&amp;query=Hu%2C+Z">Zhiqiang Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+N">Ning Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Bian%2C+C">Cheng Bian</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+Y">Yefeng Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Vesal%2C+S">Sulaiman Vesal</a>, <a href="/search/eess?searchtype=author&amp;query=Ravikumar%2C+N">Nishant Ravikumar</a>, <a href="/search/eess?searchtype=author&amp;query=Maier%2C+A">Andreas Maier</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+X">Xin Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Heng%2C+P">Pheng-Ann Heng</a>, <a href="/search/eess?searchtype=author&amp;query=Ni%2C+D">Dong Ni</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Caizi Li</a>, <a href="/search/eess?searchtype=author&amp;query=Tong%2C+Q">Qianqian Tong</a>, <a href="/search/eess?searchtype=author&amp;query=Si%2C+W">Weixin Si</a>, <a href="/search/eess?searchtype=author&amp;query=Puybareau%2C+E">Elodie Puybareau</a>, <a href="/search/eess?searchtype=author&amp;query=Khoudli%2C+Y">Younes Khoudli</a>, <a href="/search/eess?searchtype=author&amp;query=Geraud%2C+T">Thierry Geraud</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+C">Chen Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Bai%2C+W">Wenjia Bai</a>, <a href="/search/eess?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+L">Lingchao Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a>, <a href="/search/eess?searchtype=author&amp;query=Luo%2C+X">Xinzhe Luo</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shuman Jia</a> , et al. (19 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.12314v3-abstract-short" style="display: inline;"> Segmentation of cardiac images, particularly late gadolinium-enhanced magnetic resonance imaging (LGE-MRI) widely used for visualizing diseased cardiac structures, is a crucial first step for clinical diagnosis and treatment. However, direct segmentation of LGE-MRIs is challenging due to its attenuated contrast. Since most clinical studies have relied on manual and labor-intensive approaches, auto&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12314v3-abstract-full').style.display = 'inline'; document.getElementById('2004.12314v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.12314v3-abstract-full" style="display: none;"> Segmentation of cardiac images, particularly late gadolinium-enhanced magnetic resonance imaging (LGE-MRI) widely used for visualizing diseased cardiac structures, is a crucial first step for clinical diagnosis and treatment. However, direct segmentation of LGE-MRIs is challenging due to its attenuated contrast. Since most clinical studies have relied on manual and labor-intensive approaches, automatic methods are of high interest, particularly optimized machine learning approaches. To address this, we organized the &#34;2018 Left Atrium Segmentation Challenge&#34; using 154 3D LGE-MRIs, currently the world&#39;s largest cardiac LGE-MRI dataset, and associated labels of the left atrium segmented by three medical experts, ultimately attracting the participation of 27 international teams. In this paper, extensive analysis of the submitted algorithms using technical and biological metrics was performed by undergoing subgroup analysis and conducting hyper-parameter analysis, offering an overall picture of the major design choices of convolutional neural networks (CNNs) and practical considerations for achieving state-of-the-art left atrium segmentation. Results show the top method achieved a dice score of 93.2% and a mean surface to a surface distance of 0.7 mm, significantly outperforming prior state-of-the-art. Particularly, our analysis demonstrated that double, sequentially used CNNs, in which a first CNN is used for automatic region-of-interest localization and a subsequent CNN is used for refined regional segmentation, achieved far superior results than traditional methods and pipelines containing single CNNs. This large-scale benchmarking study makes a significant step towards much-improved segmentation methods for cardiac LGE-MRIs, and will serve as an important benchmark for evaluating and comparing the future works in the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12314v3-abstract-full').style.display = 'none'; document.getElementById('2004.12314v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.07917">arXiv:1911.07917</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.07917">pdf</a>, <a href="https://arxiv.org/format/1911.07917">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Cross-modal supervised learning for better acoustic representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shaoyong Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Shu%2C+X">Xin Shu</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Y">Yang Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dawei Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Q">Qiyue Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+J">Junhui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.07917v2-abstract-short" style="display: inline;"> Obtaining large-scale human-labeled datasets to train acoustic representation models is a very challenging task. On the contrary, we can easily collect data with machine-generated labels. In this work, we propose to exploit machine-generated labels to learn better acoustic representations, based on the synchronization between vision and audio. Firstly, we collect a large-scale video dataset with 1&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.07917v2-abstract-full').style.display = 'inline'; document.getElementById('1911.07917v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.07917v2-abstract-full" style="display: none;"> Obtaining large-scale human-labeled datasets to train acoustic representation models is a very challenging task. On the contrary, we can easily collect data with machine-generated labels. In this work, we propose to exploit machine-generated labels to learn better acoustic representations, based on the synchronization between vision and audio. Firstly, we collect a large-scale video dataset with 15 million samples, which totally last 16,320 hours. Each video is 3 to 5 seconds in length and annotated automatically by publicly available visual and audio classification models. Secondly, we train various classical convolutional neural networks (CNNs) including VGGish, ResNet 50 and Mobilenet v2. We also make several improvements to VGGish and achieve better results. Finally, we transfer our models on three external standard benchmarks for audio classification task, and achieve significant performance boost over the state-of-the-art results. Models and codes are available at: https://github.com/Deeperjia/vgg-like-audio-models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.07917v2-abstract-full').style.display = 'none'; document.getElementById('1911.07917v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.05451">arXiv:1911.05451</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.05451">pdf</a>, <a href="https://arxiv.org/format/1911.05451">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Gold Matrix Ghost Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+X">Xiwei Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+X">Xue Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+W">Wenying Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shanshan Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+C">Cheng Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Song%2C+L">Lijun Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.05451v1-abstract-short" style="display: inline;"> Light field modulation matrix is closely related to the quality of reconstructed image in ghost imaging. The orthogonality light field modulation matrix with better noise immunity and high quality reconstructed image is urgently needed in the practical application of ghost imaging. In this work, we propose a Gold matrix ghost imaging method with excellent imaging and anti-noise performance, which&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.05451v1-abstract-full').style.display = 'inline'; document.getElementById('1911.05451v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.05451v1-abstract-full" style="display: none;"> Light field modulation matrix is closely related to the quality of reconstructed image in ghost imaging. The orthogonality light field modulation matrix with better noise immunity and high quality reconstructed image is urgently needed in the practical application of ghost imaging. In this work, we propose a Gold matrix ghost imaging method with excellent imaging and anti-noise performance, which is proud of the pseudo-randomness and orthogonality of the deterministic Gold matrix. Numerical simulation and experimental results show that the Gold matrix has the characteristics of both random speckle and Hadamard matrix, and the reconstructed images are superior to the two modulation matrixes under the condition of noise, which shows better imaging robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.05451v1-abstract-full').style.display = 'none'; document.getElementById('1911.05451v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.05033">arXiv:1911.05033</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.05033">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1364/OE.383240">10.1364/OE.383240 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Visual cryptography in single-pixel imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Feng%2C+J">Jun Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Lei%2C+T">Ting Lei</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaocong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.05033v1-abstract-short" style="display: inline;"> Two novel visual cryptography (VC) schemes are proposed by combining VC with single-pixel imaging (SPI) for the first time. It is pointed out that the overlapping of visual key images in VC is similar to the superposition of pixel intensities by a single-pixel detector in SPI. In the first scheme, QR-code VC is designed by using opaque sheets instead of transparent sheets. The secret image can be&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.05033v1-abstract-full').style.display = 'inline'; document.getElementById('1911.05033v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.05033v1-abstract-full" style="display: none;"> Two novel visual cryptography (VC) schemes are proposed by combining VC with single-pixel imaging (SPI) for the first time. It is pointed out that the overlapping of visual key images in VC is similar to the superposition of pixel intensities by a single-pixel detector in SPI. In the first scheme, QR-code VC is designed by using opaque sheets instead of transparent sheets. The secret image can be recovered when identical illumination patterns are projected onto multiple visual key images and a single detector is used to record the total light intensities. In the second scheme, the secret image is shared by multiple illumination pattern sequences and it can be recovered when the visual key patterns are projected onto identical items. The application of VC can be extended to more diversified scenarios by our proposed schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.05033v1-abstract-full').style.display = 'none'; document.getElementById('1911.05033v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.00353">arXiv:1911.00353</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.00353">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1364/OE.382319">10.1364/OE.382319 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Does deep learning always outperform simple linear regression in optical imaging? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Feng%2C+J">Jun Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Lei%2C+T">Ting Lei</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaocong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.00353v2-abstract-short" style="display: inline;"> Deep learning has been extensively applied in many optical imaging applications in recent years. Despite the success, the limitations and drawbacks of deep learning in optical imaging have been seldom investigated. In this work, we show that conventional linear-regression-based methods can outperform the previously proposed deep learning approaches for two black-box optical imaging problems in som&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.00353v2-abstract-full').style.display = 'inline'; document.getElementById('1911.00353v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.00353v2-abstract-full" style="display: none;"> Deep learning has been extensively applied in many optical imaging applications in recent years. Despite the success, the limitations and drawbacks of deep learning in optical imaging have been seldom investigated. In this work, we show that conventional linear-regression-based methods can outperform the previously proposed deep learning approaches for two black-box optical imaging problems in some extent. Deep learning demonstrates its weakness especially when the number of training samples is small. The advantages and disadvantages of linear-regression-based methods and deep learning are analyzed and compared. Since many optical systems are essentially linear, a deep learning network containing many nonlinearity functions sometimes may not be the most suitable option. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.00353v2-abstract-full').style.display = 'none'; document.getElementById('1911.00353v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.11222">arXiv:1910.11222</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.11222">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Data hiding in complex-amplitude modulation using a digital micromirror device </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+D">Dongfang Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+C">Chonglei Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Lei%2C+T">Ting Lei</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaocong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.11222v1-abstract-short" style="display: inline;"> A digital micromirror device (DMD) is an amplitude-type spatial light modulator. However, a complex-amplitude light modulation with a DMD can be achieved using the superpixel scheme. In the superpixel scheme, we notice that multiple different DMD local block patterns may correspond to the same complex superpixel value. Based on this inherent encoding redundancy, a large amount of external data can&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11222v1-abstract-full').style.display = 'inline'; document.getElementById('1910.11222v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.11222v1-abstract-full" style="display: none;"> A digital micromirror device (DMD) is an amplitude-type spatial light modulator. However, a complex-amplitude light modulation with a DMD can be achieved using the superpixel scheme. In the superpixel scheme, we notice that multiple different DMD local block patterns may correspond to the same complex superpixel value. Based on this inherent encoding redundancy, a large amount of external data can be embedded into the DMD pattern without extra cost. Meanwhile, the original complex light field information carried by the DMD pattern is fully preserved. This proposed scheme is favorable for applications such as secure information transmission and copyright protection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.11222v1-abstract-full').style.display = 'none'; document.getElementById('1910.11222v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.13594">arXiv:1905.13594</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1905.13594">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Known-plaintext attack and ciphertext-only attack for encrypted single-pixel imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Lei%2C+T">Ting Lei</a>, <a href="/search/eess?searchtype=author&amp;query=Xie%2C+Z">Zhenwei Xie</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaocong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.13594v1-abstract-short" style="display: inline;"> In many previous works, a single-pixel imaging (SPI) system is constructed as an optical image encryption system. Unauthorized users are not able to reconstruct the plaintext image from the ciphertext intensity sequence without knowing the illumination pattern key. However, little cryptanalysis about encrypted SPI has been investigated in the past. In this work, we propose a known-plaintext attack&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.13594v1-abstract-full').style.display = 'inline'; document.getElementById('1905.13594v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.13594v1-abstract-full" style="display: none;"> In many previous works, a single-pixel imaging (SPI) system is constructed as an optical image encryption system. Unauthorized users are not able to reconstruct the plaintext image from the ciphertext intensity sequence without knowing the illumination pattern key. However, little cryptanalysis about encrypted SPI has been investigated in the past. In this work, we propose a known-plaintext attack scheme and a ciphertext-only attack scheme to an encrypted SPI system for the first time. The known-plaintext attack is implemented by interchanging the roles of illumination patterns and object images in the SPI model. The ciphertext-only attack is implemented based on the statistical features of single-pixel intensity values. The two schemes can crack encrypted SPI systems and successfully recover the key containing correct illumination patterns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.13594v1-abstract-full').style.display = 'none'; document.getElementById('1905.13594v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.09525">arXiv:1905.09525</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1905.09525">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Accelerating MR Imaging via Deep Chambolle-Pock Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haifeng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+Z">Zhilang Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+C">Caiyun Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Zou%2C+L">Lixian Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Su%2C+S">Shi Su</a>, <a href="/search/eess?searchtype=author&amp;query=Chang%2C+Y">Yuchou Chang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yanjie Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Ying%2C+L">Leslie Ying</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.09525v1-abstract-short" style="display: inline;"> Compressed sensing (CS) has been introduced to accelerate data acquisition in MR Imaging. However, CS-MRI methods suffer from detail loss with large acceleration and complicated parameter selection. To address the limitations of existing CS-MRI methods, a model-driven MR reconstruction is proposed that trains a deep network, named CP-net, which is derived from the Chambolle-Pock algorithm to recon&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.09525v1-abstract-full').style.display = 'inline'; document.getElementById('1905.09525v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.09525v1-abstract-full" style="display: none;"> Compressed sensing (CS) has been introduced to accelerate data acquisition in MR Imaging. However, CS-MRI methods suffer from detail loss with large acceleration and complicated parameter selection. To address the limitations of existing CS-MRI methods, a model-driven MR reconstruction is proposed that trains a deep network, named CP-net, which is derived from the Chambolle-Pock algorithm to reconstruct the in vivo MR images of human brains from highly undersampled complex k-space data acquired on different types of MR scanners. The proposed deep network can learn the proximal operator and parameters among the Chambolle-Pock algorithm. All of the experiments show that the proposed CP-net achieves more accurate MR reconstruction results, outperforming state-of-the-art methods across various quantitative metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.09525v1-abstract-full').style.display = 'none'; document.getElementById('1905.09525v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 5 figures, 1 table, Accepted at 2019 IEEE 41st Engineering in Medicine and Biology Conference (EMBC 2019)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.03109">arXiv:1905.03109</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Human Gait Database for Normal Walk Collected by Smartphone Accelerometer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Vajdi%2C+A">Amir Vajdi</a>, <a href="/search/eess?searchtype=author&amp;query=Zaghian%2C+M+R">Mohammad Reza Zaghian</a>, <a href="/search/eess?searchtype=author&amp;query=Dehkordi%2C+N+R">Nazli Rafei Dehkordi</a>, <a href="/search/eess?searchtype=author&amp;query=Rastegari%2C+E">Elham Rastegari</a>, <a href="/search/eess?searchtype=author&amp;query=Maroofi%2C+K">Kian Maroofi</a>, <a href="/search/eess?searchtype=author&amp;query=Farahmand%2C+S">Saman Farahmand</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Shaohua Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Pomplun%2C+M">Marc Pomplun</a>, <a href="/search/eess?searchtype=author&amp;query=Haspel%2C+N">Nurit Haspel</a>, <a href="/search/eess?searchtype=author&amp;query=Bayat%2C+A">Akram Bayat</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.03109v5-abstract-short" style="display: inline;"> Gait recognition is the characterization of unique biometric patterns associated with each individual which can be utilized to identify a person without direct contact. A public gait database with a relatively large number of subjects can provide a great opportunity for future studies to build and validate gait authentication models. The goal of this study is to introduce a comprehensive gait data&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.03109v5-abstract-full').style.display = 'inline'; document.getElementById('1905.03109v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.03109v5-abstract-full" style="display: none;"> Gait recognition is the characterization of unique biometric patterns associated with each individual which can be utilized to identify a person without direct contact. A public gait database with a relatively large number of subjects can provide a great opportunity for future studies to build and validate gait authentication models. The goal of this study is to introduce a comprehensive gait database of 93 human subjects who walked between two endpoints (320 meters) during two different sessions and record their gait data using two smartphones, one attached to the right thigh and another one on the left side of the waist. This data is collected to be utilized by a deep learning-based method that requires enough time points. The metadata including age, gender, smoking, daily exercise time, height, and weight of an individual is recorded. this data set is publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.03109v5-abstract-full').style.display = 'none'; document.getElementById('1905.03109v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">There was a lack of method description and we suggest to use the previous version of the article where we provided a more extensive methodology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.07985">arXiv:1902.07985</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1902.07985">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.optcom.2020.125476">10.1016/j.optcom.2020.125476 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multiple-image encryption and hiding with an optical diffractive neural network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Fang%2C+J">Juncheng Fang</a>, <a href="/search/eess?searchtype=author&amp;query=Lei%2C+T">Ting Lei</a>, <a href="/search/eess?searchtype=author&amp;query=Xie%2C+Z">Zhenwei Xie</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaocong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.07985v2-abstract-short" style="display: inline;"> A cascaded phase-only mask architecture (or an optical diffractive neural network) can be employed for different optical information processing tasks such as pattern recognition, orbital angular momentum (OAM) mode conversion, image salience detection and image encryption. However, for optical encryption and watermarking applications, such a system usually cannot process multiple pairs of input im&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.07985v2-abstract-full').style.display = 'inline'; document.getElementById('1902.07985v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.07985v2-abstract-full" style="display: none;"> A cascaded phase-only mask architecture (or an optical diffractive neural network) can be employed for different optical information processing tasks such as pattern recognition, orbital angular momentum (OAM) mode conversion, image salience detection and image encryption. However, for optical encryption and watermarking applications, such a system usually cannot process multiple pairs of input images and output images simultaneously. In our proposed scheme, multiple input images can be simultaneously fed to an optical diffractive neural network (DNN) system and each corresponding output image will be displayed in a non-overlap sub-region in the output imaging plane. Each input image undergoes a different optical transform in an independent channel within the same system. The multiple cascaded phase masks in the system can be effectively optimized by a wavefront matching algorithm. Similar to recent optical pattern recognition and mode conversion works, the orthogonality property is employed to design a multiplexed DNN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.07985v2-abstract-full').style.display = 'none'; document.getElementById('1902.07985v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.03811">arXiv:1806.03811</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1806.03811">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Compression of phase-only holograms with JPEG standard and deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/eess?searchtype=author&amp;query=Chang%2C+C">Chenliang Chang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+C">Changyuan Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Zou%2C+W">Wenbin Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+X">Xia Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.03811v1-abstract-short" style="display: inline;"> It is a critical issue to reduce the enormous amount of data in the processing, storage and transmission of a hologram in digital format. In photograph compression, the JPEG standard is commonly supported by almost every system and device. It will be favorable if JPEG standard is applicable to hologram compression, with advantages of universal compatibility. However, the reconstructed image from a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.03811v1-abstract-full').style.display = 'inline'; document.getElementById('1806.03811v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.03811v1-abstract-full" style="display: none;"> It is a critical issue to reduce the enormous amount of data in the processing, storage and transmission of a hologram in digital format. In photograph compression, the JPEG standard is commonly supported by almost every system and device. It will be favorable if JPEG standard is applicable to hologram compression, with advantages of universal compatibility. However, the reconstructed image from a JPEG compressed hologram suffers from severe quality degradation since some high frequency features in the hologram will be lost during the compression process. In this work, we employ a deep convolutional neural network to reduce the artifacts in a JPEG compressed hologram. Simulation and experimental results reveal that our proposed &#34;JPEG + deep learning&#34; hologram compression scheme can achieve satisfactory reconstruction results for a computer-generated phase-only hologram after compression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.03811v1-abstract-full').style.display = 'none'; document.getElementById('1806.03811v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.01340">arXiv:1806.01340</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1806.01340">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Design of optimal illumination patterns in single-pixel imaging using image dictionaries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Feng%2C+J">Jun Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Jiao%2C+S">Shuming Jiao</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Lei%2C+T">Ting Lei</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+X">Xiaocong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.01340v2-abstract-short" style="display: inline;"> Single-pixel imaging (SPI) has a major drawback that many sequential illuminations are required for capturing one single image with long acquisition time. Basis illumination patterns such as Fourier patterns and Hadamard patterns can achieve much better imaging efficiency than random patterns. But the performance is still sub-optimal since the basis patterns are fixed and non-adaptive for varying&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.01340v2-abstract-full').style.display = 'inline'; document.getElementById('1806.01340v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.01340v2-abstract-full" style="display: none;"> Single-pixel imaging (SPI) has a major drawback that many sequential illuminations are required for capturing one single image with long acquisition time. Basis illumination patterns such as Fourier patterns and Hadamard patterns can achieve much better imaging efficiency than random patterns. But the performance is still sub-optimal since the basis patterns are fixed and non-adaptive for varying object images. This Letter proposes a novel scheme for designing and optimizing the illumination patterns adaptively from an image dictionary by extracting the common image features using principal component analysis (PCA). Simulation and experimental results reveal that our proposed scheme outperforms conventional Fourier SPI in terms of imaging efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.01340v2-abstract-full').style.display = 'none'; document.getElementById('1806.01340v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1412.7911">arXiv:1412.7911</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1412.7911">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/CCDC.2014.6852245">10.1109/CCDC.2014.6852245 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Improving controllability of complex networks by rewiring links regularly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xu%2C+J">Jiuqiang Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+J">Jinfa Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+H">Hai Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+S">Siyuan Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1412.7911v1-abstract-short" style="display: inline;"> Network science have constantly been in the focus of research for the last decade, with considerable advances in the controllability of their structural. However, much less effort has been devoted to study that how to improve the controllability of complex networks. In this paper, a new algorithm is proposed to improve the controllability of complex networks by rewiring links regularly which trans&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1412.7911v1-abstract-full').style.display = 'inline'; document.getElementById('1412.7911v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1412.7911v1-abstract-full" style="display: none;"> Network science have constantly been in the focus of research for the last decade, with considerable advances in the controllability of their structural. However, much less effort has been devoted to study that how to improve the controllability of complex networks. In this paper, a new algorithm is proposed to improve the controllability of complex networks by rewiring links regularly which transforms the network structure. Then it is demonstrated that our algorithm is very effective after numerical simulation experiment on typical network models (Erd枚s-R茅nyi and scale-free network). We find that our algorithm is mainly determined by the average degree and positive correlation of in-degree and out-degree of network and it has nothing to do with the network size. Furthermore, we analyze and discuss the correlation between controllability of complex networks and degree distribution index: power-law exponent and heterogeneity <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1412.7911v1-abstract-full').style.display = 'none'; document.getElementById('1412.7911v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2014. </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10