CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–14 of 14 results for author: <span class="mathjax">Kong, D</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Kong%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kong, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kong%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kong, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02514">arXiv:2410.02514</a> <span> [<a href="https://arxiv.org/pdf/2410.02514">pdf</a>, <a href="https://arxiv.org/format/2410.02514">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Propagation Distance Estimation for Radio over Fiber with Cascaded Structure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dexin Kong</a>, <a href="/search/eess?searchtype=author&query=Osorio%2C+D+P+M">Diana Pamela Moya Osorio</a>, <a href="/search/eess?searchtype=author&query=Larsson%2C+E+G">Erik G. Larsson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02514v1-abstract-short" style="display: inline;"> Recent developments in polymer microwave fiber (PMF) have opened great opportunities for robust, low-cost, and high-speed sub-terahertz (THz) communications. Noticing this great potential, this paper addresses the problem of estimation of the propagation distance of a sub-Thz signal along a radio over fiber structure. Particularly, this paper considers a novel cascaded structure that interconnects… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02514v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02514v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02514v1-abstract-full" style="display: none;"> Recent developments in polymer microwave fiber (PMF) have opened great opportunities for robust, low-cost, and high-speed sub-terahertz (THz) communications. Noticing this great potential, this paper addresses the problem of estimation of the propagation distance of a sub-Thz signal along a radio over fiber structure. Particularly, this paper considers a novel cascaded structure that interconnects multiple radio units (RUs) via fiber for applications in indoor scenarios. Herein, we consider the cascaded effects of distortions introduced by non-linear power amplifiers at the RUs, and the propagation channel over the fiber is based on measurements obtained from transmissions of sub-THz signals on high-density polyethylene fibers. For the estimation of the propagation distance, non-linear least-squares algorithms are proposed, and our simulation results demonstrate that the proposed estimators present a good performance on the propagation distance estimation even in the presence of the cascaded effect of non-linear PAs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02514v1-abstract-full').style.display = 'none'; document.getElementById('2410.02514v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">in proceedings of 25th IEEE International Workshop on Signal Processing Advances in Wireless Communications (SPAWC)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15853">arXiv:2403.15853</a> <span> [<a href="https://arxiv.org/pdf/2403.15853">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An edge detection-based deep learning approach for tear meniscus height measurement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kesheng Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kunhui Xu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiaoyu Chen</a>, <a href="/search/eess?searchtype=author&query=He%2C+C">Chunlei He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianfeng Zhang</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dexing Kong</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Q">Qi Dai</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+S">Shoujun Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15853v1-abstract-short" style="display: inline;"> Automatic measurements of tear meniscus height (TMH) have been achieved by using deep learning techniques; however, annotation is significantly influenced by subjective factors and is both time-consuming and labor-intensive. In this paper, we introduce an automatic TMH measurement technique based on edge detection-assisted annotation within a deep learning framework. This method generates mask lab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15853v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15853v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15853v1-abstract-full" style="display: none;"> Automatic measurements of tear meniscus height (TMH) have been achieved by using deep learning techniques; however, annotation is significantly influenced by subjective factors and is both time-consuming and labor-intensive. In this paper, we introduce an automatic TMH measurement technique based on edge detection-assisted annotation within a deep learning framework. This method generates mask labels less affected by subjective factors with enhanced efficiency compared to previous annotation approaches. For improved segmentation of the pupil and tear meniscus areas, the convolutional neural network Inceptionv3 was first implemented as an image quality assessment model, effectively identifying higher-quality images with an accuracy of 98.224%. Subsequently, by using the generated labels, various algorithms, including Unet, ResUnet, Deeplabv3+FcnResnet101, Deeplabv3+FcnResnet50, FcnResnet50, and FcnResnet101 were trained, with Unet demonstrating the best performance. Finally, Unet was used for automatic pupil and tear meniscus segmentation to locate the center of the pupil and calculate TMH,respectively. An evaluation of the mask quality predicted by Unet indicated a Mean Intersection over Union of 0.9362, a recall of 0.9261, a precision of 0.9423, and an F1-Score of 0.9326. Additionally, the TMH predicted by the model was assessed, with the fitting curve represented as y= 0.982x-0.862, an overall correlation coefficient of r^2=0.961 , and an accuracy of 94.80% (237/250). In summary, the algorithm can automatically screen images based on their quality,segment the pupil and tear meniscus areas, and automatically measure TMH. Measurement results using the AI algorithm demonstrate a high level of consistency with manual measurements, offering significant support to clinical doctors in diagnosing dry eye disease. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15853v1-abstract-full').style.display = 'none'; document.getElementById('2403.15853v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.16419">arXiv:2312.16419</a> <span> [<a href="https://arxiv.org/pdf/2312.16419">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Radar detection of wake vortex behind the aircraft: the detection range problem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gong%2C+J">Jiangkun Gong</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+J">Jun Yan</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Deyong Kong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Deren Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.16419v1-abstract-short" style="display: inline;"> In this study, we showcased the detection of the wake vortex produced by a medium aircraft at distances exceeding 10 km using an X-band pulse-Doppler radar. We analyzed radar signals within the range profiles behind a Boeing 737 aircraft on February 7, 2021, within the airspace of the Runway Protection Zone (RPZ) at Tianhe Airport, Wuhan, China. The findings revealed that the wake vortex extended… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16419v1-abstract-full').style.display = 'inline'; document.getElementById('2312.16419v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.16419v1-abstract-full" style="display: none;"> In this study, we showcased the detection of the wake vortex produced by a medium aircraft at distances exceeding 10 km using an X-band pulse-Doppler radar. We analyzed radar signals within the range profiles behind a Boeing 737 aircraft on February 7, 2021, within the airspace of the Runway Protection Zone (RPZ) at Tianhe Airport, Wuhan, China. The findings revealed that the wake vortex extended up to 6 km from the aircraft, which is 10 km from the radar, displaying distinct stages characterized by scattering patterns and Doppler signatures. Despite the wake vortex exhibiting a scattering power approximately 10 dB lower than that of the aircraft, its Doppler Signal-to-Clutter Ratio (DSCR) values were only 5 dB lower, indicating a notably strong scattering power within a single radar bin. Additionally, certain radar parameters proved inconsistent in the stable detection and tracking of wake vortex, aligning with our earlier concept of cognitive micro-Doppler radar. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16419v1-abstract-full').style.display = 'none'; document.getElementById('2312.16419v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.11745">arXiv:2311.11745</a> <span> [<a href="https://arxiv.org/pdf/2311.11745">pdf</a>, <a href="https://arxiv.org/format/2311.11745">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ELF: Encoding Speaker-Specific Latent Speech Feature for Speech Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kong%2C+J">Jungil Kong</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+J">Junmo Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Jeongmin Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+B">Beomjeong Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+J">Jihoon Park</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dohee Kong</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Changheon Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">Sangjin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.11745v2-abstract-short" style="display: inline;"> In this work, we propose a novel method for modeling numerous speakers, which enables expressing the overall characteristics of speakers in detail like a trained multi-speaker model without additional training on the target speaker's dataset. Although various works with similar purposes have been actively studied, their performance has not yet reached that of trained multi-speaker models due to th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11745v2-abstract-full').style.display = 'inline'; document.getElementById('2311.11745v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.11745v2-abstract-full" style="display: none;"> In this work, we propose a novel method for modeling numerous speakers, which enables expressing the overall characteristics of speakers in detail like a trained multi-speaker model without additional training on the target speaker's dataset. Although various works with similar purposes have been actively studied, their performance has not yet reached that of trained multi-speaker models due to their fundamental limitations. To overcome previous limitations, we propose effective methods for feature learning and representing target speakers' speech characteristics by discretizing the features and conditioning them to a speech synthesis model. Our method obtained a significantly higher similarity mean opinion score (SMOS) in subjective similarity evaluation than seen speakers of a high-performance multi-speaker model, even with unseen speakers. The proposed method also outperforms a zero-shot method by significant margins. Furthermore, our method shows remarkable performance in generating new artificial speakers. In addition, we demonstrate that the encoded latent features are sufficiently informative to reconstruct an original speaker's speech completely. It implies that our method can be used as a general methodology to encode and reconstruct speakers' characteristics in various tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11745v2-abstract-full').style.display = 'none'; document.getElementById('2311.11745v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14769">arXiv:2310.14769</a> <span> [<a href="https://arxiv.org/pdf/2310.14769">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> An introduction to radar Automatic Target Recognition (ATR) technology in ground-based radar systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gong%2C+J">Jiangkun Gong</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+J">Jun Yan</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Deyong Kong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Deren Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14769v1-abstract-short" style="display: inline;"> This paper presents a brief examination of Automatic Target Recognition (ATR) technology within ground-based radar systems. It offers a lucid comprehension of the ATR concept, delves into its historical milestones, and categorizes ATR methods according to different scattering regions. By incorporating ATR solutions into radar systems, this study demonstrates the expansion of radar detection ranges… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14769v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14769v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14769v1-abstract-full" style="display: none;"> This paper presents a brief examination of Automatic Target Recognition (ATR) technology within ground-based radar systems. It offers a lucid comprehension of the ATR concept, delves into its historical milestones, and categorizes ATR methods according to different scattering regions. By incorporating ATR solutions into radar systems, this study demonstrates the expansion of radar detection ranges and the enhancement of tracking capabilities, leading to superior situational awareness. Drawing insights from the Russo-Ukrainian War, the paper highlights three pressing radar applications that urgently necessitate ATR technology: detecting stealth aircraft, countering small drones, and implementing anti-jamming measures. Anticipating the next wave of radar ATR research, the study predicts a surge in cognitive radar and machine learning (ML)-driven algorithms. These emerging methodologies aspire to confront challenges associated with system adaptation, real-time recognition, and environmental adaptability. Ultimately, ATR stands poised to revolutionize conventional radar systems, ushering in an era of 4D sensing capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14769v1-abstract-full').style.display = 'none'; document.getElementById('2310.14769v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06339">arXiv:2310.06339</a> <span> [<a href="https://arxiv.org/pdf/2310.06339">pdf</a>, <a href="https://arxiv.org/format/2310.06339">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automatic nodule identification and differentiation in ultrasound videos to facilitate per-nodule examination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+S">Siyuan Jiang</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Y">Yan Ding</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuling Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lei Xu</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+W">Wenli Dai</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+W">Wanru Chang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianfeng Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+J">Jie Yu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jianqiao Zhou</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chunquan Zhang</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+P">Ping Liang</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dexing Kong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06339v1-abstract-short" style="display: inline;"> Ultrasound is a vital diagnostic technique in health screening, with the advantages of non-invasive, cost-effective, and radiation free, and therefore is widely applied in the diagnosis of nodules. However, it relies heavily on the expertise and clinical experience of the sonographer. In ultrasound images, a single nodule might present heterogeneous appearances in different cross-sectional views w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06339v1-abstract-full').style.display = 'inline'; document.getElementById('2310.06339v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06339v1-abstract-full" style="display: none;"> Ultrasound is a vital diagnostic technique in health screening, with the advantages of non-invasive, cost-effective, and radiation free, and therefore is widely applied in the diagnosis of nodules. However, it relies heavily on the expertise and clinical experience of the sonographer. In ultrasound images, a single nodule might present heterogeneous appearances in different cross-sectional views which makes it hard to perform per-nodule examination. Sonographers usually discriminate different nodules by examining the nodule features and the surrounding structures like gland and duct, which is cumbersome and time-consuming. To address this problem, we collected hundreds of breast ultrasound videos and built a nodule reidentification system that consists of two parts: an extractor based on the deep learning model that can extract feature vectors from the input video clips and a real-time clustering algorithm that automatically groups feature vectors by nodules. The system obtains satisfactory results and exhibits the capability to differentiate ultrasound videos. As far as we know, it's the first attempt to apply re-identification technique in the ultrasonic field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06339v1-abstract-full').style.display = 'none'; document.getElementById('2310.06339v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.15415">arXiv:2309.15415</a> <span> [<a href="https://arxiv.org/pdf/2309.15415">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Formation Wing-Beat Modulation (FWM): A Tool for Quantifying Bird Flocks Using Radar Micro-Doppler Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gong%2C+J">Jiangkun Gong</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+J">Jun Yan</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Deyong Kong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+R">Ruizhi Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Deren Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.15415v1-abstract-short" style="display: inline;"> Radar echoes from bird flocks contain modulation signals, which we find are produced by the flapping gaits of birds in the flock, resulting in a group of spectral peaks with similar amplitudes spaced at a specific interval. We call this the formation wing-beat modulation (FWM) effect. FWM signals are micro-Doppler modulated by flapping wings and are related to the bird number, wing-beat frequency,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15415v1-abstract-full').style.display = 'inline'; document.getElementById('2309.15415v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.15415v1-abstract-full" style="display: none;"> Radar echoes from bird flocks contain modulation signals, which we find are produced by the flapping gaits of birds in the flock, resulting in a group of spectral peaks with similar amplitudes spaced at a specific interval. We call this the formation wing-beat modulation (FWM) effect. FWM signals are micro-Doppler modulated by flapping wings and are related to the bird number, wing-beat frequency, and flight phasing strategy. Our X-band radar data show that FWM signals exist in radar signals of a seagull flock, providing tools for quantifying the bird number and estimating the mean wingbeat rate of birds. This new finding could aid in research on the quantification of bird migration numbers and estimation of bird flight behavior in radar ornithology and aero-ecology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15415v1-abstract-full').style.display = 'none'; document.getElementById('2309.15415v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.16430">arXiv:2307.16430</a> <span> [<a href="https://arxiv.org/pdf/2307.16430">pdf</a>, <a href="https://arxiv.org/format/2307.16430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> VITS2: Improving Quality and Efficiency of Single-Stage Text-to-Speech with Adversarial Learning and Architecture Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kong%2C+J">Jungil Kong</a>, <a href="/search/eess?searchtype=author&query=Park%2C+J">Jihoon Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+B">Beomjeong Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Jeongmin Kim</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dohee Kong</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">Sangjin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.16430v1-abstract-short" style="display: inline;"> Single-stage text-to-speech models have been actively studied recently, and their results have outperformed two-stage pipeline systems. Although the previous single-stage model has made great progress, there is room for improvement in terms of its intermittent unnaturalness, computational efficiency, and strong dependence on phoneme conversion. In this work, we introduce VITS2, a single-stage text… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16430v1-abstract-full').style.display = 'inline'; document.getElementById('2307.16430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.16430v1-abstract-full" style="display: none;"> Single-stage text-to-speech models have been actively studied recently, and their results have outperformed two-stage pipeline systems. Although the previous single-stage model has made great progress, there is room for improvement in terms of its intermittent unnaturalness, computational efficiency, and strong dependence on phoneme conversion. In this work, we introduce VITS2, a single-stage text-to-speech model that efficiently synthesizes a more natural speech by improving several aspects of the previous work. We propose improved structures and training mechanisms and present that the proposed methods are effective in improving naturalness, similarity of speech characteristics in a multi-speaker model, and efficiency of training and inference. Furthermore, we demonstrate that the strong dependence on phoneme conversion in previous works can be significantly reduced with our method, which allows a fully end-to-end single-stage approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16430v1-abstract-full').style.display = 'none'; document.getElementById('2307.16430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Interspeech 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.10326">arXiv:2307.10326</a> <span> [<a href="https://arxiv.org/pdf/2307.10326">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Introduction to Drone Detection Radar with Emphasis on Automatic Target Recognition (ATR) technology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gong%2C+J">Jiangkun Gong</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+J">Jun Yan</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Deyong Kong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Deren Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.10326v1-abstract-short" style="display: inline;"> This paper discusses the challenges of detecting and categorizing small drones with radar automatic target recognition (ATR) technology. The authors suggest integrating ATR capabilities into drone detection radar systems to improve performance and manage emerging threats. The study focuses primarily on drones in Group 1 and 2. The paper highlights the need to consider kinetic features and signal s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10326v1-abstract-full').style.display = 'inline'; document.getElementById('2307.10326v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.10326v1-abstract-full" style="display: none;"> This paper discusses the challenges of detecting and categorizing small drones with radar automatic target recognition (ATR) technology. The authors suggest integrating ATR capabilities into drone detection radar systems to improve performance and manage emerging threats. The study focuses primarily on drones in Group 1 and 2. The paper highlights the need to consider kinetic features and signal signatures, such as micro-Doppler, in ATR techniques to efficiently recognize small drones. The authors also present a comprehensive drone detection radar system design that balances detection and tracking requirements, incorporating parameter adjustment based on scattering region theory. They offer an example of a performance improvement achieved using feedback and situational awareness mechanisms with the integrated ATR capabilities. Furthermore, the paper examines challenges related to one-way attack drones and explores the potential of cognitive radar as a solution. The integration of ATR capabilities transforms a 3D radar system into a 4D radar system, resulting in improved drone detection performance. These advancements are useful in military, civilian, and commercial applications, and ongoing research and development efforts are essential to keep radar systems effective and ready to detect, track, and respond to emerging threats. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10326v1-abstract-full').style.display = 'none'; document.getElementById('2307.10326v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 14 figures, submitted to a journal and being under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03885">arXiv:2211.03885</a> <span> [<a href="https://arxiv.org/pdf/2211.03885">pdf</a>, <a href="https://arxiv.org/format/2211.03885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Learned Smartphone ISP on Mobile GPUs with Deep Learning, Mobile AI & AIM 2022 Challenge: Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ignatov%2C+A">Andrey Ignatov</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+C">Chaoyu Feng</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+F">Furui Bai</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaotao Wang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+L">Lei Lei</a>, <a href="/search/eess?searchtype=author&query=Yi%2C+Z">Ziyao Yi</a>, <a href="/search/eess?searchtype=author&query=Xiang%2C+Y">Yan Xiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zibin Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shaoqing Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+K">Keming Shi</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dehui Kong</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+K">Ke Xu</a>, <a href="/search/eess?searchtype=author&query=Kwon%2C+M">Minsu Kwon</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yaqi Wu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+J">Jiesi Zheng</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Z">Zhihao Fan</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xun Wu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+F">Feng Zhang</a>, <a href="/search/eess?searchtype=author&query=No%2C+A">Albert No</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+M">Minhyeok Cho</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zewen Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaze Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+R">Ran Li</a> , et al. (13 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03885v1-abstract-short" style="display: inline;"> The role of mobile cameras increased dramatically over the past few years, leading to more and more research in automatic image quality enhancement and RAW photo processing. In this Mobile AI challenge, the target was to develop an efficient end-to-end AI-based image signal processing (ISP) pipeline replacing the standard mobile ISPs that can run on modern smartphone GPUs using TensorFlow Lite. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03885v1-abstract-full').style.display = 'inline'; document.getElementById('2211.03885v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03885v1-abstract-full" style="display: none;"> The role of mobile cameras increased dramatically over the past few years, leading to more and more research in automatic image quality enhancement and RAW photo processing. In this Mobile AI challenge, the target was to develop an efficient end-to-end AI-based image signal processing (ISP) pipeline replacing the standard mobile ISPs that can run on modern smartphone GPUs using TensorFlow Lite. The participants were provided with a large-scale Fujifilm UltraISP dataset consisting of thousands of paired photos captured with a normal mobile camera sensor and a professional 102MP medium-format FujiFilm GFX100 camera. The runtime of the resulting models was evaluated on the Snapdragon's 8 Gen 1 GPU that provides excellent acceleration results for the majority of common deep learning ops. The proposed solutions are compatible with all recent mobile GPUs, being able to process Full HD photos in less than 20-50 milliseconds while achieving high fidelity results. A detailed description of all models developed in this challenge is provided in this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03885v1-abstract-full').style.display = 'none'; document.getElementById('2211.03885v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.10403">arXiv:2110.10403</a> <span> [<a href="https://arxiv.org/pdf/2110.10403">pdf</a>, <a href="https://arxiv.org/format/2110.10403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AFTer-UNet: Axial Fusion Transformer UNet for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiangyi Yan</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+S">Shanlin Sun</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+H">Haoyu Ma</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Deying Kong</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xiaohui Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.10403v1-abstract-short" style="display: inline;"> Recent advances in transformer-based models have drawn attention to exploring these techniques in medical image segmentation, especially in conjunction with the U-Net model (or its variants), which has shown great success in medical image segmentation, under both 2D and 3D settings. Current 2D based methods either directly replace convolutional layers with pure transformers or consider a transform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.10403v1-abstract-full').style.display = 'inline'; document.getElementById('2110.10403v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.10403v1-abstract-full" style="display: none;"> Recent advances in transformer-based models have drawn attention to exploring these techniques in medical image segmentation, especially in conjunction with the U-Net model (or its variants), which has shown great success in medical image segmentation, under both 2D and 3D settings. Current 2D based methods either directly replace convolutional layers with pure transformers or consider a transformer as an additional intermediate encoder between the encoder and decoder of U-Net. However, these approaches only consider the attention encoding within one single slice and do not utilize the axial-axis information naturally provided by a 3D volume. In the 3D setting, convolution on volumetric data and transformers both consume large GPU memory. One has to either downsample the image or use cropped local patches to reduce GPU memory usage, which limits its performance. In this paper, we propose Axial Fusion Transformer UNet (AFTer-UNet), which takes both advantages of convolutional layers' capability of extracting detailed features and transformers' strength on long sequence modeling. It considers both intra-slice and inter-slice long-range cues to guide the segmentation. Meanwhile, it has fewer parameters and takes less GPU memory to train than the previous transformer-based models. Extensive experiments on three multi-organ segmentation datasets demonstrate that our method outperforms current state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.10403v1-abstract-full').style.display = 'none'; document.getElementById('2110.10403v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.10190">arXiv:2009.10190</a> <span> [<a href="https://arxiv.org/pdf/2009.10190">pdf</a>, <a href="https://arxiv.org/format/2009.10190">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Tissues and Organs">q-bio.TO</span> </div> </div> <p class="title is-5 mathjax"> Federated Learning for Computational Pathology on Gigapixel Whole Slide Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lu%2C+M+Y">Ming Y. Lu</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dehan Kong</a>, <a href="/search/eess?searchtype=author&query=Lipkova%2C+J">Jana Lipkova</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+R+J">Richard J. Chen</a>, <a href="/search/eess?searchtype=author&query=Singh%2C+R">Rajendra Singh</a>, <a href="/search/eess?searchtype=author&query=Williamson%2C+D+F+K">Drew F. K. Williamson</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+T+Y">Tiffany Y. Chen</a>, <a href="/search/eess?searchtype=author&query=Mahmood%2C+F">Faisal Mahmood</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.10190v2-abstract-short" style="display: inline;"> Deep Learning-based computational pathology algorithms have demonstrated profound ability to excel in a wide array of tasks that range from characterization of well known morphological phenotypes to predicting non-human-identifiable features from histology such as molecular alterations. However, the development of robust, adaptable, and accurate deep learning-based models often rely on the collect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.10190v2-abstract-full').style.display = 'inline'; document.getElementById('2009.10190v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.10190v2-abstract-full" style="display: none;"> Deep Learning-based computational pathology algorithms have demonstrated profound ability to excel in a wide array of tasks that range from characterization of well known morphological phenotypes to predicting non-human-identifiable features from histology such as molecular alterations. However, the development of robust, adaptable, and accurate deep learning-based models often rely on the collection and time-costly curation large high-quality annotated training data that should ideally come from diverse sources and patient populations to cater for the heterogeneity that exists in such datasets. Multi-centric and collaborative integration of medical data across multiple institutions can naturally help overcome this challenge and boost the model performance but is limited by privacy concerns amongst other difficulties that may arise in the complex data sharing process as models scale towards using hundreds of thousands of gigapixel whole slide images. In this paper, we introduce privacy-preserving federated learning for gigapixel whole slide images in computational pathology using weakly-supervised attention multiple instance learning and differential privacy. We evaluated our approach on two different diagnostic problems using thousands of histology whole slide images with only slide-level labels. Additionally, we present a weakly-supervised learning framework for survival prediction and patient stratification from whole slide images and demonstrate its effectiveness in a federated setting. Our results show that using federated learning, we can effectively develop accurate weakly supervised deep learning models from distributed data silos without direct data sharing and its associated complexities, while also preserving differential privacy using randomized noise generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.10190v2-abstract-full').style.display = 'none'; document.getElementById('2009.10190v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.14168">arXiv:2007.14168</a> <span> [<a href="https://arxiv.org/pdf/2007.14168">pdf</a>, <a href="https://arxiv.org/ps/2007.14168">ps</a>, <a href="https://arxiv.org/format/2007.14168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s11432-020-3011-7">10.1007/s11432-020-3011-7 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MMSE Channel Estimation for Two-Port Demodulation Reference Signals in New Radio </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kong%2C+D">Dejin Kong</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+P">Pei Liu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Q">Qibiao Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.14168v1-abstract-short" style="display: inline;"> Two-port demodulation reference signals (DMRS) have been employed in new radio (NR) recently. In this paper, we firstly propose a minimum mean square error (MMSE) scheme with full priori knowledge (F-MMSE) to achieve the channel estimation of two-port DMRS in NR. When the two ports are assigned to different users, the full priori knowledge of two ports is not easy to be obtained for one user. Then… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.14168v1-abstract-full').style.display = 'inline'; document.getElementById('2007.14168v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.14168v1-abstract-full" style="display: none;"> Two-port demodulation reference signals (DMRS) have been employed in new radio (NR) recently. In this paper, we firstly propose a minimum mean square error (MMSE) scheme with full priori knowledge (F-MMSE) to achieve the channel estimation of two-port DMRS in NR. When the two ports are assigned to different users, the full priori knowledge of two ports is not easy to be obtained for one user. Then, we present a MMSE scheme with partial priori knowledge (P-MMSE). Finally, numerical results show that the proposed schemes achieve satisfactory channel estimation performance. Moreover, for both mean square error and bit error ratio metrics, the proposed schemes can achieve better performance compared with the classical discrete Fourier transform based channel estimation. Particularly, P-MMSE scheme delivers almost the same performance compared with F-MMSE scheme by a small amount of prior knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.14168v1-abstract-full').style.display = 'none'; document.getElementById('2007.14168v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures, the simplified version of this draft is accepted by SCIENCE CHINA Information Sciences</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Science China Information Sciences, vol. 64, no. 6, Jun. 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.08869">arXiv:2001.08869</a> <span> [<a href="https://arxiv.org/pdf/2001.08869">pdf</a>, <a href="https://arxiv.org/format/2001.08869">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Nonparametric Structure Regularization Machine for 2D Hand Pose Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yifei Chen</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+H">Haoyu Ma</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+D">Deying Kong</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiangyi Yan</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jianbao Wu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+W">Wei Fan</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xiaohui Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.08869v1-abstract-short" style="display: inline;"> Hand pose estimation is more challenging than body pose estimation due to severe articulation, self-occlusion and high dexterity of the hand. Current approaches often rely on a popular body pose algorithm, such as the Convolutional Pose Machine (CPM), to learn 2D keypoint features. These algorithms cannot adequately address the unique challenges of hand pose estimation, because they are trained so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.08869v1-abstract-full').style.display = 'inline'; document.getElementById('2001.08869v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.08869v1-abstract-full" style="display: none;"> Hand pose estimation is more challenging than body pose estimation due to severe articulation, self-occlusion and high dexterity of the hand. Current approaches often rely on a popular body pose algorithm, such as the Convolutional Pose Machine (CPM), to learn 2D keypoint features. These algorithms cannot adequately address the unique challenges of hand pose estimation, because they are trained solely based on keypoint positions without seeking to explicitly model structural relationship between them. We propose a novel Nonparametric Structure Regularization Machine (NSRM) for 2D hand pose estimation, adopting a cascade multi-task architecture to learn hand structure and keypoint representations jointly. The structure learning is guided by synthetic hand mask representations, which are directly computed from keypoint positions, and is further strengthened by a novel probabilistic representation of hand limbs and an anatomically inspired composition strategy of mask synthesis. We conduct extensive studies on two public datasets - OneHand 10k and CMU Panoptic Hand. Experimental results demonstrate that explicitly enforcing structure learning consistently improves pose estimation accuracy of CPM baseline models, by 1.17% on the first dataset and 4.01% on the second one. The implementation and experiment code is freely available online. Our proposal of incorporating structural learning to hand pose estimation requires no additional training information, and can be a generic add-on module to other pose estimation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.08869v1-abstract-full').style.display = 'none'; document.getElementById('2001.08869v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has be accepted and will be presented at 2020 IEEE Winter Conference on Applications of Computer Vision (WACV). The code is freely available at https://github.com/HowieMa/NSRMhand</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>