Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 59 results for author: <span class="mathjax">Kim, G</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Kim%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kim, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kim%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kim, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Kim%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Kim%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Kim%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03502">arXiv:2502.03502</a> <span> [<a href="https://arxiv.org/pdf/2502.03502">pdf</a>, <a href="https://arxiv.org/format/2502.03502">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> DC-VSR: Spatially and Temporally Consistent Video Super-Resolution with Video Diffusion Prior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+J">Janghyeok Han</a>, <a href="/search/eess?searchtype=author&query=Sim%2C+G">Gyujin Sim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geonung Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+H">Hyunseung Lee</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+K">Kyuha Choi</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Y">Youngseok Han</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+S">Sunghyun Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03502v1-abstract-short" style="display: inline;"> Video super-resolution (VSR) aims to reconstruct a high-resolution (HR) video from a low-resolution (LR) counterpart. Achieving successful VSR requires producing realistic HR details and ensuring both spatial and temporal consistency. To restore realistic details, diffusion-based VSR approaches have recently been proposed. However, the inherent randomness of diffusion, combined with their tile-bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03502v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03502v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03502v1-abstract-full" style="display: none;"> Video super-resolution (VSR) aims to reconstruct a high-resolution (HR) video from a low-resolution (LR) counterpart. Achieving successful VSR requires producing realistic HR details and ensuring both spatial and temporal consistency. To restore realistic details, diffusion-based VSR approaches have recently been proposed. However, the inherent randomness of diffusion, combined with their tile-based approach, often leads to spatio-temporal inconsistencies. In this paper, we propose DC-VSR, a novel VSR approach to produce spatially and temporally consistent VSR results with realistic textures. To achieve spatial and temporal consistency, DC-VSR adopts a novel Spatial Attention Propagation (SAP) scheme and a Temporal Attention Propagation (TAP) scheme that propagate information across spatio-temporal tiles based on the self-attention mechanism. To enhance high-frequency details, we also introduce Detail-Suppression Self-Attention Guidance (DSSAG), a novel diffusion guidance scheme. Comprehensive experiments demonstrate that DC-VSR achieves spatially and temporally consistent, high-quality VSR results, outperforming previous approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03502v1-abstract-full').style.display = 'none'; document.getElementById('2502.03502v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Equal contributions from first two authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11225">arXiv:2501.11225</a> <span> [<a href="https://arxiv.org/pdf/2501.11225">pdf</a>, <a href="https://arxiv.org/format/2501.11225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> CNN-based TEM image denoising from first principles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chae%2C+J">Jinwoong Chae</a>, <a href="/search/eess?searchtype=author&query=Hong%2C+S">Sungwook Hong</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">Sungkyu Kim</a>, <a href="/search/eess?searchtype=author&query=Yoon%2C+S">Sungroh Yoon</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gunn Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11225v1-abstract-short" style="display: inline;"> Transmission electron microscope (TEM) images are often corrupted by noise, hindering their interpretation. To address this issue, we propose a deep learning-based approach using simulated images. Using density functional theory calculations with a set of pseudo-atomic orbital basis sets, we generate highly accurate ground truth images. We introduce four types of noise into these simulations to cr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11225v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11225v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11225v1-abstract-full" style="display: none;"> Transmission electron microscope (TEM) images are often corrupted by noise, hindering their interpretation. To address this issue, we propose a deep learning-based approach using simulated images. Using density functional theory calculations with a set of pseudo-atomic orbital basis sets, we generate highly accurate ground truth images. We introduce four types of noise into these simulations to create realistic training datasets. Each type of noise is then used to train a separate convolutional neural network (CNN) model. Our results show that these CNNs are effective in reducing noise, even when applied to images with different noise levels than those used during training. However, we observe limitations in some cases, particularly in preserving the integrity of circular shapes and avoiding visible artifacts between image patches. To overcome these challenges, we propose alternative training strategies and future research directions. This study provides a valuable framework for training deep learning models for TEM image denoising. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11225v1-abstract-full').style.display = 'none'; document.getElementById('2501.11225v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages and 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13564">arXiv:2407.13564</a> <span> [<a href="https://arxiv.org/pdf/2407.13564">pdf</a>, <a href="https://arxiv.org/format/2407.13564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Convergence result for the gradient-push algorithm and its application to boost up the Push-DIging algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Choi%2C+H">Hyogi Choi</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+W">Woocheol Choi</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwangil Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13564v1-abstract-short" style="display: inline;"> The gradient-push algorithm is a fundamental algorithm for the distributed optimization problem \begin{equation} \min_{x \in \mathbb{R}^d} f(x) = \sum_{j=1}^n f_j (x), \end{equation} where each local cost $f_j$ is only known to agent $a_i$ for $1 \leq i \leq n$ and the agents are connected by a directed graph. In this paper, we obtain convergence results for the gradient-push algorithm with consta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13564v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13564v1-abstract-full" style="display: none;"> The gradient-push algorithm is a fundamental algorithm for the distributed optimization problem \begin{equation} \min_{x \in \mathbb{R}^d} f(x) = \sum_{j=1}^n f_j (x), \end{equation} where each local cost $f_j$ is only known to agent $a_i$ for $1 \leq i \leq n$ and the agents are connected by a directed graph. In this paper, we obtain convergence results for the gradient-push algorithm with constant stepsize whose range is sharp in terms the order of the smoothness constant $L>0$. Precisely, under the two settings: 1) Each local cost $f_i$ is strongly convex and $L$-smooth, 2) Each local cost $f_i$ is convex quadratic and $L$-smooth while the aggregate cost $f$ is strongly convex, we show that the gradient-push algorithm with stepsize $伪>0$ converges to an $O(伪)$-neighborhood of the minimizer of $f$ for a range $伪\in (0, c/L]$ with a value $c>0$ independent of $L>0$. As a benefit of the result, we suggest a hybrid algorithm that performs the gradient-push algorithm with a relatively large stepsize $伪>0$ for a number of iterations and then go over to perform the Push-DIGing algorithm. It is verified by a numerical test that the hybrid algorithm enhances the performance of the Push-DIGing algorithm significantly. The convergence results of the gradient-push algorithm are also supported by numerical tests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13564v1-abstract-full').style.display = 'none'; document.getElementById('2407.13564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11365">arXiv:2407.11365</a> <span> [<a href="https://arxiv.org/pdf/2407.11365">pdf</a>, <a href="https://arxiv.org/format/2407.11365">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Team HYU ASML ROBOVOX SP Cup 2024 System Description </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Choi%2C+J">Jeong-Hwan Choi</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gaeun Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+H">Hee-Jae Lee</a>, <a href="/search/eess?searchtype=author&query=Ahn%2C+S">Seyun Ahn</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H">Hyun-Soo Kim</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+J">Joon-Hyuk Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11365v1-abstract-short" style="display: inline;"> This report describes the submission of HYU ASML team to the IEEE Signal Processing Cup 2024 (SP Cup 2024). This challenge, titled "ROBOVOX: Far-Field Speaker Recognition by a Mobile Robot," focuses on speaker recognition using a mobile robot in noisy and reverberant conditions. Our solution combines the result of deep residual neural networks and time-delay neural network-based speaker embedding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11365v1-abstract-full').style.display = 'inline'; document.getElementById('2407.11365v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11365v1-abstract-full" style="display: none;"> This report describes the submission of HYU ASML team to the IEEE Signal Processing Cup 2024 (SP Cup 2024). This challenge, titled "ROBOVOX: Far-Field Speaker Recognition by a Mobile Robot," focuses on speaker recognition using a mobile robot in noisy and reverberant conditions. Our solution combines the result of deep residual neural networks and time-delay neural network-based speaker embedding models. These models were trained on a diverse dataset that includes French speech. To account for the challenging evaluation environment characterized by high noise, reverberation, and short speech conditions, we focused on data augmentation and training speech duration for the speaker embedding model. Our submission achieved second place on the SP Cup 2024 public leaderboard, with a detection cost function of 0.5245 and an equal error rate of 6.46%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11365v1-abstract-full').style.display = 'none'; document.getElementById('2407.11365v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical report for IEEE Signal Processing Cup 2024, 9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16994">arXiv:2406.16994</a> <span> [<a href="https://arxiv.org/pdf/2406.16994">pdf</a>, <a href="https://arxiv.org/format/2406.16994">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Quantum Multi-Agent Reinforcement Learning for Cooperative Mobile Access in Space-Air-Ground Integrated Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+G+S">Gyu Seon Kim</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+Y">Yeryeong Cho</a>, <a href="/search/eess?searchtype=author&query=Chung%2C+J">Jaehyun Chung</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S">Soohyun Park</a>, <a href="/search/eess?searchtype=author&query=Jung%2C+S">Soyi Jung</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhu Han</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Joongheon Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16994v1-abstract-short" style="display: inline;"> Achieving global space-air-ground integrated network (SAGIN) access only with CubeSats presents significant challenges such as the access sustainability limitations in specific regions (e.g., polar regions) and the energy efficiency limitations in CubeSats. To tackle these problems, high-altitude long-endurance unmanned aerial vehicles (HALE-UAVs) can complement these CubeSat shortcomings for prov… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16994v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16994v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16994v1-abstract-full" style="display: none;"> Achieving global space-air-ground integrated network (SAGIN) access only with CubeSats presents significant challenges such as the access sustainability limitations in specific regions (e.g., polar regions) and the energy efficiency limitations in CubeSats. To tackle these problems, high-altitude long-endurance unmanned aerial vehicles (HALE-UAVs) can complement these CubeSat shortcomings for providing cooperatively global access sustainability and energy efficiency. However, as the number of CubeSats and HALE-UAVs, increases, the scheduling dimension of each ground station (GS) increases. As a result, each GS can fall into the curse of dimensionality, and this challenge becomes one major hurdle for efficient global access. Therefore, this paper provides a quantum multi-agent reinforcement Learning (QMARL)-based method for scheduling between GSs and CubeSats/HALE-UAVs in order to improve global access availability and energy efficiency. The main reason why the QMARL-based scheduler can be beneficial is that the algorithm facilitates a logarithmic-scale reduction in scheduling action dimensions, which is one critical feature as the number of CubeSats and HALE-UAVs expands. Additionally, individual GSs have different traffic demands depending on their locations and characteristics, thus it is essential to provide differentiated access services. The superiority of the proposed scheduler is validated through data-intensive experiments in realistic CubeSat/HALE-UAV settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16994v1-abstract-full').style.display = 'none'; document.getElementById('2406.16994v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 22 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05270">arXiv:2406.05270</a> <span> [<a href="https://arxiv.org/pdf/2406.05270">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> fastMRI Breast: A publicly available radial k-space dataset of breast dynamic contrast-enhanced MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Solomon%2C+E">Eddy Solomon</a>, <a href="/search/eess?searchtype=author&query=Johnson%2C+P+M">Patricia M. Johnson</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+Z">Zhengguo Tan</a>, <a href="/search/eess?searchtype=author&query=Tibrewala%2C+R">Radhika Tibrewala</a>, <a href="/search/eess?searchtype=author&query=Lui%2C+Y+W">Yvonne W. Lui</a>, <a href="/search/eess?searchtype=author&query=Knoll%2C+F">Florian Knoll</a>, <a href="/search/eess?searchtype=author&query=Moy%2C+L">Linda Moy</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S+G">Sungheon Gene Kim</a>, <a href="/search/eess?searchtype=author&query=Heacock%2C+L">Laura Heacock</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05270v1-abstract-short" style="display: inline;"> This data curation work introduces the first large-scale dataset of radial k-space and DICOM data for breast DCE-MRI acquired in diagnostic breast MRI exams. Our dataset includes case-level labels indicating patient age, menopause status, lesion status (negative, benign, and malignant), and lesion type for each case. The public availability of this dataset and accompanying reconstruction code will… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05270v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05270v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05270v1-abstract-full" style="display: none;"> This data curation work introduces the first large-scale dataset of radial k-space and DICOM data for breast DCE-MRI acquired in diagnostic breast MRI exams. Our dataset includes case-level labels indicating patient age, menopause status, lesion status (negative, benign, and malignant), and lesion type for each case. The public availability of this dataset and accompanying reconstruction code will support research and development of fast and quantitative breast image reconstruction and machine learning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05270v1-abstract-full').style.display = 'none'; document.getElementById('2406.05270v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02562">arXiv:2406.02562</a> <span> [<a href="https://arxiv.org/pdf/2406.02562">pdf</a>, <a href="https://arxiv.org/format/2406.02562">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Gated Low-rank Adaptation for personalized Code-Switching Automatic Speech Recognition on the low-spec devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwantae Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+B">Bokyeung Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D">Donghyeon Kim</a>, <a href="/search/eess?searchtype=author&query=Ko%2C+H">Hanseok Ko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02562v1-abstract-short" style="display: inline;"> In recent times, there has been a growing interest in utilizing personalized large models on low-spec devices, such as mobile and CPU-only devices. However, utilizing a personalized large model in the on-device is inefficient, and sometimes limited due to computational cost. To tackle the problem, this paper presents the weights separation method to minimize on-device model weights using parameter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02562v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02562v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02562v1-abstract-full" style="display: none;"> In recent times, there has been a growing interest in utilizing personalized large models on low-spec devices, such as mobile and CPU-only devices. However, utilizing a personalized large model in the on-device is inefficient, and sometimes limited due to computational cost. To tackle the problem, this paper presents the weights separation method to minimize on-device model weights using parameter-efficient fine-tuning methods. Moreover, some people speak multiple languages in an utterance, as known as code-switching, the personalized ASR model is necessary to address such cases. However, current multilingual speech recognition models are limited to recognizing a single language within each utterance. To tackle this problem, we propose code-switching speech recognition models that incorporate fine-tuned monolingual and multilingual speech recognition models. Additionally, we introduce a gated low-rank adaptation(GLoRA) for parameter-efficient fine-tuning with minimal performance degradation. Our experiments, conducted on Korean-English code-switching datasets, demonstrate that fine-tuning speech recognition models for code-switching surpasses the performance of traditional code-switching speech recognition models trained from scratch. Furthermore, GLoRA enhances parameter-efficient fine-tuning performance compared to conventional LoRA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02562v1-abstract-full').style.display = 'none'; document.getElementById('2406.02562v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Table 2 is revised</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICASSP 2024 Workshop(HSCMA 2024) paper </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19380">arXiv:2405.19380</a> <span> [<a href="https://arxiv.org/pdf/2405.19380">pdf</a>, <a href="https://arxiv.org/format/2405.19380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Approximate Thompson Sampling for Learning Linear Quadratic Regulators with $O(\sqrt{T})$ Regret </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+Y">Yeoneung Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gihun Kim</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+I">Insoon Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19380v1-abstract-short" style="display: inline;"> We propose an approximate Thompson sampling algorithm that learns linear quadratic regulators (LQR) with an improved Bayesian regret bound of $O(\sqrt{T})$. Our method leverages Langevin dynamics with a meticulously designed preconditioner as well as a simple excitation mechanism. We show that the excitation signal induces the minimum eigenvalue of the preconditioner to grow over time, thereby acc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19380v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19380v1-abstract-full" style="display: none;"> We propose an approximate Thompson sampling algorithm that learns linear quadratic regulators (LQR) with an improved Bayesian regret bound of $O(\sqrt{T})$. Our method leverages Langevin dynamics with a meticulously designed preconditioner as well as a simple excitation mechanism. We show that the excitation signal induces the minimum eigenvalue of the preconditioner to grow over time, thereby accelerating the approximate posterior sampling process. Moreover, we identify nontrivial concentration properties of the approximate posteriors generated by our algorithm. These properties enable us to bound the moments of the system state and attain an $O(\sqrt{T})$ regret bound without the unrealistic restrictive assumptions on parameter sets that are often used in the literature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19380v1-abstract-full').style.display = 'none'; document.getElementById('2405.19380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">61 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13762">arXiv:2405.13762</a> <span> [<a href="https://arxiv.org/pdf/2405.13762">pdf</a>, <a href="https://arxiv.org/format/2405.13762">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> A Versatile Diffusion Transformer with Mixture of Noise Levels for Audiovisual Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Martinez%2C+A">Alonso Martinez</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Y">Yu-Chuan Su</a>, <a href="/search/eess?searchtype=author&query=Jou%2C+B">Brendan Jou</a>, <a href="/search/eess?searchtype=author&query=Lezama%2C+J">Jos茅 Lezama</a>, <a href="/search/eess?searchtype=author&query=Gupta%2C+A">Agrim Gupta</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Lijun Yu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+L">Lu Jiang</a>, <a href="/search/eess?searchtype=author&query=Jansen%2C+A">Aren Jansen</a>, <a href="/search/eess?searchtype=author&query=Walker%2C+J">Jacob Walker</a>, <a href="/search/eess?searchtype=author&query=Somandepalli%2C+K">Krishna Somandepalli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13762v1-abstract-short" style="display: inline;"> Training diffusion models for audiovisual sequences allows for a range of generation tasks by learning conditional distributions of various input-output combinations of the two modalities. Nevertheless, this strategy often requires training a separate model for each task which is expensive. Here, we propose a novel training approach to effectively learn arbitrary conditional distributions in the a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13762v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13762v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13762v1-abstract-full" style="display: none;"> Training diffusion models for audiovisual sequences allows for a range of generation tasks by learning conditional distributions of various input-output combinations of the two modalities. Nevertheless, this strategy often requires training a separate model for each task which is expensive. Here, we propose a novel training approach to effectively learn arbitrary conditional distributions in the audiovisual space.Our key contribution lies in how we parameterize the diffusion timestep in the forward diffusion process. Instead of the standard fixed diffusion timestep, we propose applying variable diffusion timesteps across the temporal dimension and across modalities of the inputs. This formulation offers flexibility to introduce variable noise levels for various portions of the input, hence the term mixture of noise levels. We propose a transformer-based audiovisual latent diffusion model and show that it can be trained in a task-agnostic fashion using our approach to enable a variety of audiovisual generation tasks at inference time. Experiments demonstrate the versatility of our method in tackling cross-modal and multimodal interpolation tasks in the audiovisual space. Notably, our proposed approach surpasses baselines in generating temporally and perceptually consistent samples conditioned on the input. Project page: avdit2024.github.io <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13762v1-abstract-full').style.display = 'none'; document.getElementById('2405.13762v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11807">arXiv:2405.11807</a> <span> [<a href="https://arxiv.org/pdf/2405.11807">pdf</a>, <a href="https://arxiv.org/format/2405.11807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Dual-sided Peltier Elements for Rapid Thermal Feedback in Wearables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kang%2C+S">Seongjun Kang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwangbin Kim</a>, <a href="/search/eess?searchtype=author&query=Hwang%2C+S">Seokhyun Hwang</a>, <a href="/search/eess?searchtype=author&query=Park%2C+J">Jeongju Park</a>, <a href="/search/eess?searchtype=author&query=Elsharkawy%2C+A">Ahmed Elsharkawy</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">SeungJun Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11807v1-abstract-short" style="display: inline;"> This paper introduces a motor-driven Peltier device designed to deliver immediate thermal sensations within extended reality (XR) environments. The system incorporates eight motor-driven Peltier elements, facilitating swift transitions between warm and cool sensations by rotating preheated or cooled elements to opposite sides. A multi-layer structure, comprising aluminum and silicone layers, ensur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11807v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11807v1-abstract-full" style="display: none;"> This paper introduces a motor-driven Peltier device designed to deliver immediate thermal sensations within extended reality (XR) environments. The system incorporates eight motor-driven Peltier elements, facilitating swift transitions between warm and cool sensations by rotating preheated or cooled elements to opposite sides. A multi-layer structure, comprising aluminum and silicone layers, ensures user comfort and safety while maintaining optimal temperatures for thermal stimuli. Time-temperature characteristic analysis demonstrates the system's ability to provide warm and cool sensations efficiently, with a dual-sided lifetime of up to 206 seconds at a 2V input. Our system design is adaptable to various body parts and can be synchronized with corresponding visual stimuli to enhance the immersive sensation of virtual object interaction and information delivery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11807v1-abstract-full').style.display = 'none'; document.getElementById('2405.11807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages, 4 figures, ICRA Wearable Workshop 2024 - 1st Workshop on Advancing Wearable Devices and Applications through Novel Design, Sensing, Actuation, and AI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08962">arXiv:2401.08962</a> <span> [<a href="https://arxiv.org/pdf/2401.08962">pdf</a>, <a href="https://arxiv.org/format/2401.08962">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DOO-RE: A dataset of ambient sensors in a meeting room for activity recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+H">Hyunju Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geon Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+T">Taehoon Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+K">Kisoo Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+D">Dongman Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08962v1-abstract-short" style="display: inline;"> With the advancement of IoT technology, recognizing user activities with machine learning methods is a promising way to provide various smart services to users. High-quality data with privacy protection is essential for deploying such services in the real world. Data streams from surrounding ambient sensors are well suited to the requirement. Existing ambient sensor datasets only support constrain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08962v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08962v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08962v1-abstract-full" style="display: none;"> With the advancement of IoT technology, recognizing user activities with machine learning methods is a promising way to provide various smart services to users. High-quality data with privacy protection is essential for deploying such services in the real world. Data streams from surrounding ambient sensors are well suited to the requirement. Existing ambient sensor datasets only support constrained private spaces and those for public spaces have yet to be explored despite growing interest in research on them. To meet this need, we build a dataset collected from a meeting room equipped with ambient sensors. The dataset, DOO-RE, includes data streams from various ambient sensor types such as Sound and Projector. Each sensor data stream is segmented into activity units and multiple annotators provide activity labels through a cross-validation annotation process to improve annotation quality. We finally obtain 9 types of activities. To our best knowledge, DOO-RE is the first dataset to support the recognition of both single and group activities in a real meeting room with reliable annotations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08962v1-abstract-full').style.display = 'none'; document.getElementById('2401.08962v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.13313">arXiv:2312.13313</a> <span> [<a href="https://arxiv.org/pdf/2312.13313">pdf</a>, <a href="https://arxiv.org/format/2312.13313">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ParamISP: Learned Forward and Inverse ISPs using Camera Parameters </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+W">Woohyeok Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geonu Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+J">Junyong Lee</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Seungyong Lee</a>, <a href="/search/eess?searchtype=author&query=Baek%2C+S">Seung-Hwan Baek</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+S">Sunghyun Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.13313v2-abstract-short" style="display: inline;"> RAW images are rarely shared mainly due to its excessive data size compared to their sRGB counterparts obtained by camera ISPs. Learning the forward and inverse processes of camera ISPs has been recently demonstrated, enabling physically-meaningful RAW-level image processing on input sRGB images. However, existing learning-based ISP methods fail to handle the large variations in the ISP processes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13313v2-abstract-full').style.display = 'inline'; document.getElementById('2312.13313v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.13313v2-abstract-full" style="display: none;"> RAW images are rarely shared mainly due to its excessive data size compared to their sRGB counterparts obtained by camera ISPs. Learning the forward and inverse processes of camera ISPs has been recently demonstrated, enabling physically-meaningful RAW-level image processing on input sRGB images. However, existing learning-based ISP methods fail to handle the large variations in the ISP processes with respect to camera parameters such as ISO and exposure time, and have limitations when used for various applications. In this paper, we propose ParamISP, a learning-based method for forward and inverse conversion between sRGB and RAW images, that adopts a novel neural-network module to utilize camera parameters, which is dubbed as ParamNet. Given the camera parameters provided in the EXIF data, ParamNet converts them into a feature vector to control the ISP networks. Extensive experiments demonstrate that ParamISP achieve superior RAW and sRGB reconstruction results compared to previous methods and it can be effectively used for a variety of applications such as deblurring dataset synthesis, raw deblurring, HDR reconstruction, and camera-to-camera transfer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13313v2-abstract-full').style.display = 'none'; document.getElementById('2312.13313v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05465">arXiv:2312.05465</a> <span> [<a href="https://arxiv.org/pdf/2312.05465">pdf</a>, <a href="https://arxiv.org/format/2312.05465">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> On Task-Relevant Loss Functions in Meta-Reinforcement Learning and Online LQR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shin%2C+J">Jaeuk Shin</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Giho Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+H">Howon Lee</a>, <a href="/search/eess?searchtype=author&query=Han%2C+J">Joonho Han</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+I">Insoon Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05465v1-abstract-short" style="display: inline;"> Designing a competent meta-reinforcement learning (meta-RL) algorithm in terms of data usage remains a central challenge to be tackled for its successful real-world applications. In this paper, we propose a sample-efficient meta-RL algorithm that learns a model of the system or environment at hand in a task-directed manner. As opposed to the standard model-based approaches to meta-RL, our method e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05465v1-abstract-full').style.display = 'inline'; document.getElementById('2312.05465v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05465v1-abstract-full" style="display: none;"> Designing a competent meta-reinforcement learning (meta-RL) algorithm in terms of data usage remains a central challenge to be tackled for its successful real-world applications. In this paper, we propose a sample-efficient meta-RL algorithm that learns a model of the system or environment at hand in a task-directed manner. As opposed to the standard model-based approaches to meta-RL, our method exploits the value information in order to rapidly capture the decision-critical part of the environment. The key component of our method is the loss function for learning the task inference module and the system model that systematically couples the model discrepancy and the value estimate, thereby facilitating the learning of the policy and the task inference module with a significantly smaller amount of data compared to the existing meta-RL algorithms. The idea is also extended to a non-meta-RL setting, namely an online linear quadratic regulator (LQR) problem, where our method can be simplified to reveal the essence of the strategy. The proposed method is evaluated in high-dimensional robotic control and online LQR problems, empirically verifying its effectiveness in extracting information indispensable for solving the tasks from observations in a sample efficient manner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05465v1-abstract-full').style.display = 'none'; document.getElementById('2312.05465v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.12574">arXiv:2310.12574</a> <span> [<a href="https://arxiv.org/pdf/2310.12574">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A reproducible 3D convolutional neural network with dual attention module (3D-DAM) for Alzheimer's disease classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hoang%2C+G+M">Gia Minh Hoang</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+Y">Youngjoo Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+G">Jae Gwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.12574v3-abstract-short" style="display: inline;"> Alzheimer's disease is one of the most common types of neurodegenerative disease, characterized by the accumulation of amyloid-beta plaque and tau tangles. Recently, deep learning approaches have shown promise in Alzheimer's disease diagnosis. In this study, we propose a reproducible model that utilizes a 3D convolutional neural network with a dual attention module for Alzheimer's disease classifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12574v3-abstract-full').style.display = 'inline'; document.getElementById('2310.12574v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.12574v3-abstract-full" style="display: none;"> Alzheimer's disease is one of the most common types of neurodegenerative disease, characterized by the accumulation of amyloid-beta plaque and tau tangles. Recently, deep learning approaches have shown promise in Alzheimer's disease diagnosis. In this study, we propose a reproducible model that utilizes a 3D convolutional neural network with a dual attention module for Alzheimer's disease classification. We trained the model in the ADNI database and verified the generalizability of our method in two independent datasets (AIBL and OASIS1). Our method achieved state-of-the-art classification performance, with an accuracy of 91.94% for MCI progression classification and 96.30% for Alzheimer's disease classification on the ADNI dataset. Furthermore, the model demonstrated good generalizability, achieving an accuracy of 86.37% on the AIBL dataset and 83.42% on the OASIS1 dataset. These results indicate that our proposed approach has competitive performance and generalizability when compared to recent studies in the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12574v3-abstract-full').style.display = 'none'; document.getElementById('2310.12574v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.07409">arXiv:2307.07409</a> <span> [<a href="https://arxiv.org/pdf/2307.07409">pdf</a>, <a href="https://arxiv.org/format/2307.07409">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> KU-DMIS-MSRA at RadSum23: Pre-trained Vision-Language Model for Radiology Report Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gangwoo Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H">Hajung Kim</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+L">Lei Ji</a>, <a href="/search/eess?searchtype=author&query=Bae%2C+S">Seongsu Bae</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+C">Chanhwi Kim</a>, <a href="/search/eess?searchtype=author&query=Sung%2C+M">Mujeen Sung</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H">Hyunjae Kim</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+K">Kun Yan</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+E">Eric Chang</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jaewoo Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.07409v1-abstract-short" style="display: inline;"> In this paper, we introduce CheXOFA, a new pre-trained vision-language model (VLM) for the chest X-ray domain. Our model is initially pre-trained on various multimodal datasets within the general domain before being transferred to the chest X-ray domain. Following a prominent VLM, we unify various domain-specific tasks into a simple sequence-to-sequence schema. It enables the model to effectively… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07409v1-abstract-full').style.display = 'inline'; document.getElementById('2307.07409v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.07409v1-abstract-full" style="display: none;"> In this paper, we introduce CheXOFA, a new pre-trained vision-language model (VLM) for the chest X-ray domain. Our model is initially pre-trained on various multimodal datasets within the general domain before being transferred to the chest X-ray domain. Following a prominent VLM, we unify various domain-specific tasks into a simple sequence-to-sequence schema. It enables the model to effectively learn the required knowledge and skills from limited resources in the domain. Demonstrating superior performance on the benchmark datasets provided by the BioNLP shared task, our model benefits from its training across multiple tasks and domains. With subtle techniques including ensemble and factual calibration, our system achieves first place on the RadSum23 leaderboard for the hidden test set. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07409v1-abstract-full').style.display = 'none'; document.getElementById('2307.07409v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at BioNLP workshop @ ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13361">arXiv:2306.13361</a> <span> [<a href="https://arxiv.org/pdf/2306.13361">pdf</a>, <a href="https://arxiv.org/format/2306.13361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Neural 360$^\circ$ Structured Light with Learned Metasurfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Choi%2C+E">Eunsue Choi</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gyeongtae Kim</a>, <a href="/search/eess?searchtype=author&query=Yun%2C+J">Jooyeong Yun</a>, <a href="/search/eess?searchtype=author&query=Jeon%2C+Y">Yujin Jeon</a>, <a href="/search/eess?searchtype=author&query=Rho%2C+J">Junsuk Rho</a>, <a href="/search/eess?searchtype=author&query=Baek%2C+S">Seung-Hwan Baek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13361v2-abstract-short" style="display: inline;"> Structured light has proven instrumental in 3D imaging, LiDAR, and holographic light projection. Metasurfaces, comprised of sub-wavelength-sized nanostructures, facilitate 180$^\circ$ field-of-view (FoV) structured light, circumventing the restricted FoV inherent in traditional optics like diffractive optical elements. However, extant metasurface-facilitated structured light exhibits sub-optimal p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13361v2-abstract-full').style.display = 'inline'; document.getElementById('2306.13361v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13361v2-abstract-full" style="display: none;"> Structured light has proven instrumental in 3D imaging, LiDAR, and holographic light projection. Metasurfaces, comprised of sub-wavelength-sized nanostructures, facilitate 180$^\circ$ field-of-view (FoV) structured light, circumventing the restricted FoV inherent in traditional optics like diffractive optical elements. However, extant metasurface-facilitated structured light exhibits sub-optimal performance in downstream tasks, due to heuristic pattern designs such as periodic dots that do not consider the objectives of the end application. In this paper, we present neural 360$^\circ$ structured light, driven by learned metasurfaces. We propose a differentiable framework, that encompasses a computationally-efficient 180$^\circ$ wave propagation model and a task-specific reconstructor, and exploits both transmission and reflection channels of the metasurface. Leveraging a first-order optimizer within our differentiable framework, we optimize the metasurface design, thereby realizing neural 360$^\circ$ structured light. We have utilized neural 360$^\circ$ structured light for holographic light projection and 3D imaging. Specifically, we demonstrate the first 360$^\circ$ light projection of complex patterns, enabled by our propagation model that can be computationally evaluated 50,000$\times$ faster than the Rayleigh-Sommerfeld propagation. For 3D imaging, we improve depth-estimation accuracy by 5.09$\times$ in RMSE compared to the heuristically-designed structured light. Neural 360$^\circ$ structured light promises robust 360$^\circ$ imaging and display for robotics, extended-reality systems, and human-computer interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13361v2-abstract-full').style.display = 'none'; document.getElementById('2306.13361v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.04137">arXiv:2306.04137</a> <span> [<a href="https://arxiv.org/pdf/2306.04137">pdf</a>, <a href="https://arxiv.org/format/2306.04137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Reinforcement Learning for Cooperative Air Transportation Services in City-Wide Autonomous Urban Air Mobility </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Park%2C+C">Chanyoung Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G+S">Gyu Seon Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S">Soohyun Park</a>, <a href="/search/eess?searchtype=author&query=Jung%2C+S">Soyi Jung</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Joongheon Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.04137v1-abstract-short" style="display: inline;"> The development of urban-air-mobility (UAM) is rapidly progressing with spurs, and the demand for efficient transportation management systems is a rising need due to the multifaceted environmental uncertainties. Thus, this paper proposes a novel air transportation service management algorithm based on multi-agent deep reinforcement learning (MADRL) to address the challenges of multi-UAM cooperatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04137v1-abstract-full').style.display = 'inline'; document.getElementById('2306.04137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.04137v1-abstract-full" style="display: none;"> The development of urban-air-mobility (UAM) is rapidly progressing with spurs, and the demand for efficient transportation management systems is a rising need due to the multifaceted environmental uncertainties. Thus, this paper proposes a novel air transportation service management algorithm based on multi-agent deep reinforcement learning (MADRL) to address the challenges of multi-UAM cooperation. Specifically, the proposed algorithm in this paper is based on communication network (CommNet) method utilizing centralized training and distributed execution (CTDE) in multiple UAMs for providing efficient air transportation services to passengers collaboratively. Furthermore, this paper adopts actual vertiport maps and UAM specifications for constructing realistic air transportation networks. By evaluating the performance of the proposed algorithm in data-intensive simulations, the results show that the proposed algorithm outperforms existing approaches in terms of air transportation service quality. Furthermore, there are no inferior UAMs by utilizing parameter sharing in CommNet and a centralized critic network in CTDE. Therefore, it can be confirmed that the research results in this paper can provide a promising solution for autonomous air transportation management systems in city-wide urban areas. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04137v1-abstract-full').style.display = 'none'; document.getElementById('2306.04137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00680">arXiv:2306.00680</a> <span> [<a href="https://arxiv.org/pdf/2306.00680">pdf</a>, <a href="https://arxiv.org/format/2306.00680">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Encoder-decoder multimodal speaker change detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jung%2C+J">Jee-weon Jung</a>, <a href="/search/eess?searchtype=author&query=Seo%2C+S">Soonshin Seo</a>, <a href="/search/eess?searchtype=author&query=Heo%2C+H">Hee-Soo Heo</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geonmin Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+Y+J">You Jin Kim</a>, <a href="/search/eess?searchtype=author&query=Kwon%2C+Y">Young-ki Kwon</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+M">Minjae Lee</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+B">Bong-Jin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00680v1-abstract-short" style="display: inline;"> The task of speaker change detection (SCD), which detects points where speakers change in an input, is essential for several applications. Several studies solved the SCD task using audio inputs only and have shown limited performance. Recently, multimodal SCD (MMSCD) models, which utilise text modality in addition to audio, have shown improved performance. In this study, the proposed model are bui… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00680v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00680v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00680v1-abstract-full" style="display: none;"> The task of speaker change detection (SCD), which detects points where speakers change in an input, is essential for several applications. Several studies solved the SCD task using audio inputs only and have shown limited performance. Recently, multimodal SCD (MMSCD) models, which utilise text modality in addition to audio, have shown improved performance. In this study, the proposed model are built upon two main proposals, a novel mechanism for modality fusion and the adoption of a encoder-decoder architecture. Different to previous MMSCD works that extract speaker embeddings from extremely short audio segments, aligned to a single word, we use a speaker embedding extracted from 1.5s. A transformer decoder layer further improves the performance of an encoder-only MMSCD model. The proposed model achieves state-of-the-art results among studies that report SCD performance and is also on par with recent work that combines SCD with automatic speech recognition via human transcription. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00680v1-abstract-full').style.display = 'none'; document.getElementById('2306.00680v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, accepted for presentation at INTERSPEECH 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.08997">arXiv:2210.08997</a> <span> [<a href="https://arxiv.org/pdf/2210.08997">pdf</a>, <a href="https://arxiv.org/format/2210.08997">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> AIM 2022 Challenge on Instagram Filter Removal: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=K%C4%B1nl%C4%B1%2C+F">Furkan K谋nl谋</a>, <a href="/search/eess?searchtype=author&query=Mente%C5%9F%2C+S">Sami Mente艧</a>, <a href="/search/eess?searchtype=author&query=%C3%96zcan%2C+B">Bar谋艧脰zcan</a>, <a href="/search/eess?searchtype=author&query=K%C4%B1ra%C3%A7%2C+F">Furkan K谋ra莽</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Zuo%2C+Y">Yi Zuo</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zitao Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaowen Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yu Zhu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenghua Li</a>, <a href="/search/eess?searchtype=author&query=Leng%2C+C">Cong Leng</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+C">Chaoyu Feng</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+F">Furui Bai</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaotao Wang</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+L">Lei Lei</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+T">Tianzhi Ma</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zihan Gao</a>, <a href="/search/eess?searchtype=author&query=He%2C+W">Wenxin He</a>, <a href="/search/eess?searchtype=author&query=Yeo%2C+W">Woon-Ha Yeo</a>, <a href="/search/eess?searchtype=author&query=Oh%2C+W">Wang-Taek Oh</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+Y">Young-Il Kim</a>, <a href="/search/eess?searchtype=author&query=Ryu%2C+H">Han-Cheol Ryu</a>, <a href="/search/eess?searchtype=author&query=He%2C+G">Gang He</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.08997v1-abstract-short" style="display: inline;"> This paper introduces the methods and the results of AIM 2022 challenge on Instagram Filter Removal. Social media filters transform the images by consecutive non-linear operations, and the feature maps of the original content may be interpolated into a different domain. This reduces the overall performance of the recent deep learning strategies. The main goal of this challenge is to produce realis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.08997v1-abstract-full').style.display = 'inline'; document.getElementById('2210.08997v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.08997v1-abstract-full" style="display: none;"> This paper introduces the methods and the results of AIM 2022 challenge on Instagram Filter Removal. Social media filters transform the images by consecutive non-linear operations, and the feature maps of the original content may be interpolated into a different domain. This reduces the overall performance of the recent deep learning strategies. The main goal of this challenge is to produce realistic and visually plausible images where the impact of the filters applied is mitigated while preserving the content. The proposed solutions are ranked in terms of the PSNR value with respect to the original images. There are two prior studies on this task as the baseline, and a total of 9 teams have competed in the final phase of the challenge. The comparison of qualitative results of the proposed solutions and the benchmark for the challenge are presented in this report. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.08997v1-abstract-full').style.display = 'none'; document.getElementById('2210.08997v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures, Challenge report of AIM 2022 Instagram Filter Removal Challenge in conjunction with ECCV 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.01520">arXiv:2207.01520</a> <span> [<a href="https://arxiv.org/pdf/2207.01520">pdf</a>, <a href="https://arxiv.org/format/2207.01520">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive GLCM sampling for transformer-based COVID-19 detection on CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jung%2C+O">Okchul Jung</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+D+U">Dong Un Kang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Chun%2C+S+Y">Se Young Chun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.01520v1-abstract-short" style="display: inline;"> The world has suffered from COVID-19 (SARS-CoV-2) for the last two years, causing much damage and change in people's daily lives. Thus, automated detection of COVID-19 utilizing deep learning on chest computed tomography (CT) scans became promising, which helps correct diagnosis efficiently. Recently, transformer-based COVID-19 detection method on CT is proposed to utilize 3D information in CT vol… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.01520v1-abstract-full').style.display = 'inline'; document.getElementById('2207.01520v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.01520v1-abstract-full" style="display: none;"> The world has suffered from COVID-19 (SARS-CoV-2) for the last two years, causing much damage and change in people's daily lives. Thus, automated detection of COVID-19 utilizing deep learning on chest computed tomography (CT) scans became promising, which helps correct diagnosis efficiently. Recently, transformer-based COVID-19 detection method on CT is proposed to utilize 3D information in CT volume. However, its sampling method for selecting slices is not optimal. To leverage rich 3D information in CT volume, we propose a transformer-based COVID-19 detection using a novel data curation and adaptive sampling method using gray level co-occurrence matrices (GLCM). To train the model which consists of CNN layer, followed by transformer architecture, we first executed data curation based on lung segmentation and utilized the entropy of GLCM value of every slice in CT volumes to select important slices for the prediction. The experimental results show that the proposed method improve the detection performance with large margin without much difficult modification to the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.01520v1-abstract-full').style.display = 'none'; document.getElementById('2207.01520v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12633">arXiv:2205.12633</a> <span> [<a href="https://arxiv.org/pdf/2205.12633">pdf</a>, <a href="https://arxiv.org/format/2205.12633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2022 Challenge on High Dynamic Range Imaging: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=P%C3%A9rez-Pellitero%2C+E">Eduardo P茅rez-Pellitero</a>, <a href="/search/eess?searchtype=author&query=Catley-Chandar%2C+S">Sibi Catley-Chandar</a>, <a href="/search/eess?searchtype=author&query=Shaw%2C+R">Richard Shaw</a>, <a href="/search/eess?searchtype=author&query=Leonardis%2C+A">Ale拧 Leonardis</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zexin Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Cen Liu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Yunbo Peng</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yue Lin</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+G">Gaocheng Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jin Zhang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zhe Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hongbin Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiangyu Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xintao Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Haiwei Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jiantao Zhou</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Q">Qingsen Yan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Song Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weiye Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuhang Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> , et al. (68 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12633v1-abstract-short" style="display: inline;"> This paper reviews the challenge on constrained high dynamic range (HDR) imaging that was part of the New Trends in Image Restoration and Enhancement (NTIRE) workshop, held in conjunction with CVPR 2022. This manuscript focuses on the competition set-up, datasets, the proposed methods and their results. The challenge aims at estimating an HDR image from multiple respective low dynamic range (LDR)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12633v1-abstract-full').style.display = 'inline'; document.getElementById('2205.12633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12633v1-abstract-full" style="display: none;"> This paper reviews the challenge on constrained high dynamic range (HDR) imaging that was part of the New Trends in Image Restoration and Enhancement (NTIRE) workshop, held in conjunction with CVPR 2022. This manuscript focuses on the competition set-up, datasets, the proposed methods and their results. The challenge aims at estimating an HDR image from multiple respective low dynamic range (LDR) observations, which might suffer from under- or over-exposed regions and different sources of noise. The challenge is composed of two tracks with an emphasis on fidelity and complexity constraints: In Track 1, participants are asked to optimize objective fidelity scores while imposing a low-complexity constraint (i.e. solutions can not exceed a given number of operations). In Track 2, participants are asked to minimize the complexity of their solutions while imposing a constraint on fidelity scores (i.e. solutions are required to obtain a higher fidelity score than the prescribed baseline). Both tracks use the same data and metrics: Fidelity is measured by means of PSNR with respect to a ground-truth HDR image (computed both directly and with a canonical tonemapping operation), while complexity metrics include the number of Multiply-Accumulate (MAC) operations and runtime (in seconds). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12633v1-abstract-full').style.display = 'none'; document.getElementById('2205.12633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR Workshops 2022. 15 pages, 21 figures, 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.01304">arXiv:2205.01304</a> <span> [<a href="https://arxiv.org/pdf/2205.01304">pdf</a>, <a href="https://arxiv.org/format/2205.01304">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Efficient dynamic filter for robust and low computational feature extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+D">Donghyeon Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwantae Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+B">Bokyeung Lee</a>, <a href="/search/eess?searchtype=author&query=Kwak%2C+J">Jeong-gi Kwak</a>, <a href="/search/eess?searchtype=author&query=Han%2C+D+K">David K. Han</a>, <a href="/search/eess?searchtype=author&query=Ko%2C+H">Hanseok Ko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.01304v2-abstract-short" style="display: inline;"> Unseen noise signal which is not considered in a model training process is difficult to anticipate and would lead to performance degradation. Various methods have been investigated to mitigate unseen noise. In our previous work, an Instance-level Dynamic Filter (IDF) and a Pixel Dynamic Filter (PDF) were proposed to extract noise-robust features. However, the performance of the dynamic filter migh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.01304v2-abstract-full').style.display = 'inline'; document.getElementById('2205.01304v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.01304v2-abstract-full" style="display: none;"> Unseen noise signal which is not considered in a model training process is difficult to anticipate and would lead to performance degradation. Various methods have been investigated to mitigate unseen noise. In our previous work, an Instance-level Dynamic Filter (IDF) and a Pixel Dynamic Filter (PDF) were proposed to extract noise-robust features. However, the performance of the dynamic filter might be degraded since simple feature pooling is used to reduce the computational resource in the IDF part. In this paper, we propose an efficient dynamic filter to enhance the performance of the dynamic filter. Instead of utilizing the simple feature mean, we separate Time-Frequency (T-F) features as non-overlapping chunks, and separable convolutions are carried out for each feature direction (inter chunks and intra chunks). Additionally, we propose Dynamic Attention Pooling that maps high dimensional features as low dimensional feature embeddings. These methods are applied to the IDF for keyword spotting and speaker verification tasks. We confirm that our proposed method performs better in unseen environments (unseen noise and unseen speakers) than state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.01304v2-abstract-full').style.display = 'none'; document.getElementById('2205.01304v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accept to SLT2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.09573">arXiv:2204.09573</a> <span> [<a href="https://arxiv.org/pdf/2204.09573">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2023.102833">10.1016/j.media.2023.102833 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fetal Brain Tissue Annotation and Segmentation Challenge Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Payette%2C+K">Kelly Payette</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hongwei Li</a>, <a href="/search/eess?searchtype=author&query=de+Dumast%2C+P">Priscille de Dumast</a>, <a href="/search/eess?searchtype=author&query=Licandro%2C+R">Roxane Licandro</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+H">Hui Ji</a>, <a href="/search/eess?searchtype=author&query=Siddiquee%2C+M+M+R">Md Mahfuzur Rahman Siddiquee</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+D">Daguang Xu</a>, <a href="/search/eess?searchtype=author&query=Myronenko%2C+A">Andriy Myronenko</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/eess?searchtype=author&query=Pei%2C+Y">Yuchen Pei</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Lisheng Wang</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Ying Peng</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+J">Juanying Xie</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huiquan Zhang</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+G">Guiming Dong</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+H">Hao Fu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guotai Wang</a>, <a href="/search/eess?searchtype=author&query=Rieu%2C+Z">ZunHyan Rieu</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D">Donghyeon Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H+G">Hyun Gi Kim</a>, <a href="/search/eess?searchtype=author&query=Karimi%2C+D">Davood Karimi</a>, <a href="/search/eess?searchtype=author&query=Gholipour%2C+A">Ali Gholipour</a>, <a href="/search/eess?searchtype=author&query=Torres%2C+H+R">Helena R. Torres</a>, <a href="/search/eess?searchtype=author&query=Oliveira%2C+B">Bruno Oliveira</a>, <a href="/search/eess?searchtype=author&query=Vila%C3%A7a%2C+J+L">Jo茫o L. Vila莽a</a> , et al. (33 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.09573v1-abstract-short" style="display: inline;"> In-utero fetal MRI is emerging as an important tool in the diagnosis and analysis of the developing human brain. Automatic segmentation of the developing fetal brain is a vital step in the quantitative analysis of prenatal neurodevelopment both in the research and clinical context. However, manual segmentation of cerebral structures is time-consuming and prone to error and inter-observer variabili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09573v1-abstract-full').style.display = 'inline'; document.getElementById('2204.09573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.09573v1-abstract-full" style="display: none;"> In-utero fetal MRI is emerging as an important tool in the diagnosis and analysis of the developing human brain. Automatic segmentation of the developing fetal brain is a vital step in the quantitative analysis of prenatal neurodevelopment both in the research and clinical context. However, manual segmentation of cerebral structures is time-consuming and prone to error and inter-observer variability. Therefore, we organized the Fetal Tissue Annotation (FeTA) Challenge in 2021 in order to encourage the development of automatic segmentation algorithms on an international level. The challenge utilized FeTA Dataset, an open dataset of fetal brain MRI reconstructions segmented into seven different tissues (external cerebrospinal fluid, grey matter, white matter, ventricles, cerebellum, brainstem, deep grey matter). 20 international teams participated in this challenge, submitting a total of 21 algorithms for evaluation. In this paper, we provide a detailed analysis of the results from both a technical and clinical perspective. All participants relied on deep learning methods, mainly U-Nets, with some variability present in the network architecture, optimization, and image pre- and post-processing. The majority of teams used existing medical imaging deep learning frameworks. The main differences between the submissions were the fine tuning done during training, and the specific pre- and post-processing steps performed. The challenge results showed that almost all submissions performed similarly. Four of the top five teams used ensemble learning methods. However, one team's algorithm performed significantly superior to the other submissions, and consisted of an asymmetrical U-Net network architecture. This paper provides a first of its kind benchmark for future automatic multi-tissue segmentation algorithms for the developing human brain in utero. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09573v1-abstract-full').style.display = 'none'; document.getElementById('2204.09573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Results from FeTA Challenge 2021, held at MICCAI; Manuscript submitted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06431">arXiv:2202.06431</a> <span> [<a href="https://arxiv.org/pdf/2202.06431">pdf</a>, <a href="https://arxiv.org/format/2202.06431">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41467-022-31514-x">10.1038/s41467-022-31514-x <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AI can evolve without labels: self-evolving vision transformer for chest X-ray diagnosis through knowledge distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangjoon Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Oh%2C+Y">Yujin Oh</a>, <a href="/search/eess?searchtype=author&query=Seo%2C+J+B">Joon Beom Seo</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S+M">Sang Min Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+H">Jin Hwan Kim</a>, <a href="/search/eess?searchtype=author&query=Moon%2C+S">Sungjun Moon</a>, <a href="/search/eess?searchtype=author&query=Lim%2C+J">Jae-Kwang Lim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+C+M">Chang Min Park</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06431v1-abstract-short" style="display: inline;"> Although deep learning-based computer-aided diagnosis systems have recently achieved expert-level performance, developing a robust deep learning model requires large, high-quality data with manual annotation, which is expensive to obtain. This situation poses the problem that the chest x-rays collected annually in hospitals cannot be used due to the lack of manual labeling by experts, especially i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06431v1-abstract-full').style.display = 'inline'; document.getElementById('2202.06431v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06431v1-abstract-full" style="display: none;"> Although deep learning-based computer-aided diagnosis systems have recently achieved expert-level performance, developing a robust deep learning model requires large, high-quality data with manual annotation, which is expensive to obtain. This situation poses the problem that the chest x-rays collected annually in hospitals cannot be used due to the lack of manual labeling by experts, especially in deprived areas. To address this, here we present a novel deep learning framework that uses knowledge distillation through self-supervised learning and self-training, which shows that the performance of the original model trained with a small number of labels can be gradually improved with more unlabeled data. Experimental results show that the proposed framework maintains impressive robustness against a real-world environment and has general applicability to several diagnostic tasks such as tuberculosis, pneumothorax, and COVID-19. Notably, we demonstrated that our model performs even better than those trained with the same amount of labeled data. The proposed framework has a great potential for medical imaging, where plenty of data is accumulated every year, but ground truth annotations are expensive to obtain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06431v1-abstract-full').style.display = 'none'; document.getElementById('2202.06431v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.06735">arXiv:2201.06735</a> <span> [<a href="https://arxiv.org/pdf/2201.06735">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> AI Augmented Digital Metal Component </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Seo%2C+E">Eunhyeok Seo</a>, <a href="/search/eess?searchtype=author&query=Sung%2C+H">Hyokyung Sung</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H">Hayeol Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+T">Taekyeong Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangeun Park</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+M+S">Min Sik Lee</a>, <a href="/search/eess?searchtype=author&query=Moon%2C+S+K">Seung Ki Moon</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+G">Jung Gi Kim</a>, <a href="/search/eess?searchtype=author&query=Chung%2C+H">Hayoung Chung</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+S">Seong-Kyum Choi</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+J">Ji-hun Yu</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+K+T">Kyung Tae Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S+J">Seong Jin Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+N">Namhun Kim</a>, <a href="/search/eess?searchtype=author&query=Jung%2C+I+D">Im Doo Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.06735v1-abstract-short" style="display: inline;"> The aim of this work is to propose a new paradigm that imparts intelligence to metal parts with the fusion of metal additive manufacturing and artificial intelligence (AI). Our digital metal part classifies the status with real time data processing with convolutional neural network (CNN). The training data for the CNN is collected from a strain gauge embedded in metal parts by laser powder bed fus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.06735v1-abstract-full').style.display = 'inline'; document.getElementById('2201.06735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.06735v1-abstract-full" style="display: none;"> The aim of this work is to propose a new paradigm that imparts intelligence to metal parts with the fusion of metal additive manufacturing and artificial intelligence (AI). Our digital metal part classifies the status with real time data processing with convolutional neural network (CNN). The training data for the CNN is collected from a strain gauge embedded in metal parts by laser powder bed fusion process. We implement this approach using additive manufacturing, demonstrate a self-cognitive metal part recognizing partial screw loosening, malfunctioning, and external impacting object. The results indicate that metal part can recognize subtle change of multiple fixation state under repetitive compression with 89.1% accuracy with test sets. The proposed strategy showed promising potential in contributing to the hyper-connectivity for next generation of digital metal based mechanical systems <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.06735v1-abstract-full').style.display = 'none'; document.getElementById('2201.06735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">46 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.02831">arXiv:2201.02831</a> <span> [<a href="https://arxiv.org/pdf/2201.02831">pdf</a>, <a href="https://arxiv.org/format/2201.02831">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2022.102628">10.1016/j.media.2022.102628 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CrossMoDA 2021 challenge: Benchmark of Cross-Modality Domain Adaptation techniques for Vestibular Schwannoma and Cochlea Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Dorent%2C+R">Reuben Dorent</a>, <a href="/search/eess?searchtype=author&query=Kujawa%2C+A">Aaron Kujawa</a>, <a href="/search/eess?searchtype=author&query=Ivory%2C+M">Marina Ivory</a>, <a href="/search/eess?searchtype=author&query=Bakas%2C+S">Spyridon Bakas</a>, <a href="/search/eess?searchtype=author&query=Rieke%2C+N">Nicola Rieke</a>, <a href="/search/eess?searchtype=author&query=Joutard%2C+S">Samuel Joutard</a>, <a href="/search/eess?searchtype=author&query=Glocker%2C+B">Ben Glocker</a>, <a href="/search/eess?searchtype=author&query=Cardoso%2C+J">Jorge Cardoso</a>, <a href="/search/eess?searchtype=author&query=Modat%2C+M">Marc Modat</a>, <a href="/search/eess?searchtype=author&query=Batmanghelich%2C+K">Kayhan Batmanghelich</a>, <a href="/search/eess?searchtype=author&query=Belkov%2C+A">Arseniy Belkov</a>, <a href="/search/eess?searchtype=author&query=Calisto%2C+M+B">Maria Baldeon Calisto</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+J+W">Jae Won Choi</a>, <a href="/search/eess?searchtype=author&query=Dawant%2C+B+M">Benoit M. Dawant</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+H">Hexin Dong</a>, <a href="/search/eess?searchtype=author&query=Escalera%2C+S">Sergio Escalera</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Y">Yubo Fan</a>, <a href="/search/eess?searchtype=author&query=Hansen%2C+L">Lasse Hansen</a>, <a href="/search/eess?searchtype=author&query=Heinrich%2C+M+P">Mattias P. Heinrich</a>, <a href="/search/eess?searchtype=author&query=Joshi%2C+S">Smriti Joshi</a>, <a href="/search/eess?searchtype=author&query=Kashtanova%2C+V">Victoriya Kashtanova</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H+G">Hyeon Gyu Kim</a>, <a href="/search/eess?searchtype=author&query=Kondo%2C+S">Satoshi Kondo</a>, <a href="/search/eess?searchtype=author&query=Kruse%2C+C+N">Christian N. Kruse</a>, <a href="/search/eess?searchtype=author&query=Lai-Yuen%2C+S+K">Susana K. Lai-Yuen</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.02831v3-abstract-short" style="display: inline;"> Domain Adaptation (DA) has recently raised strong interests in the medical imaging community. While a large variety of DA techniques has been proposed for image segmentation, most of these techniques have been validated either on private datasets or on small publicly available datasets. Moreover, these datasets mostly addressed single-class problems. To tackle these limitations, the Cross-Modality… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.02831v3-abstract-full').style.display = 'inline'; document.getElementById('2201.02831v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.02831v3-abstract-full" style="display: none;"> Domain Adaptation (DA) has recently raised strong interests in the medical imaging community. While a large variety of DA techniques has been proposed for image segmentation, most of these techniques have been validated either on private datasets or on small publicly available datasets. Moreover, these datasets mostly addressed single-class problems. To tackle these limitations, the Cross-Modality Domain Adaptation (crossMoDA) challenge was organised in conjunction with the 24th International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI 2021). CrossMoDA is the first large and multi-class benchmark for unsupervised cross-modality DA. The challenge's goal is to segment two key brain structures involved in the follow-up and treatment planning of vestibular schwannoma (VS): the VS and the cochleas. Currently, the diagnosis and surveillance in patients with VS are performed using contrast-enhanced T1 (ceT1) MRI. However, there is growing interest in using non-contrast sequences such as high-resolution T2 (hrT2) MRI. Therefore, we created an unsupervised cross-modality segmentation benchmark. The training set provides annotated ceT1 (N=105) and unpaired non-annotated hrT2 (N=105). The aim was to automatically perform unilateral VS and bilateral cochlea segmentation on hrT2 as provided in the testing set (N=137). A total of 16 teams submitted their algorithm for the evaluation phase. The level of performance reached by the top-performing teams is strikingly high (best median Dice - VS:88.4%; Cochleas:85.7%) and close to full supervision (median Dice - VS:92.5%; Cochleas:87.7%). All top-performing methods made use of an image-to-image translation approach to transform the source-domain images into pseudo-target-domain images. A segmentation network was then trained using these generated images and the manual annotations provided for the source image. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.02831v3-abstract-full').style.display = 'none'; document.getElementById('2201.02831v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Medical Image Analysis</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.04028">arXiv:2111.04028</a> <span> [<a href="https://arxiv.org/pdf/2111.04028">pdf</a>, <a href="https://arxiv.org/format/2111.04028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Style Transfer with Target Feature Palette and Attention Coloring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ha%2C+S">Suhyeon Ha</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Guisik Kim</a>, <a href="/search/eess?searchtype=author&query=Kwon%2C+J">Junseok Kwon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.04028v1-abstract-short" style="display: inline;"> Style transfer has attracted a lot of attentions, as it can change a given image into one with splendid artistic styles while preserving the image structure. However, conventional approaches easily lose image details and tend to produce unpleasant artifacts during style transfer. In this paper, to solve these problems, a novel artistic stylization method with target feature palettes is proposed, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.04028v1-abstract-full').style.display = 'inline'; document.getElementById('2111.04028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.04028v1-abstract-full" style="display: none;"> Style transfer has attracted a lot of attentions, as it can change a given image into one with splendid artistic styles while preserving the image structure. However, conventional approaches easily lose image details and tend to produce unpleasant artifacts during style transfer. In this paper, to solve these problems, a novel artistic stylization method with target feature palettes is proposed, which can transfer key features accurately. Specifically, our method contains two modules, namely feature palette composition (FPC) and attention coloring (AC) modules. The FPC module captures representative features based on K-means clustering and produces a feature target palette. The following AC module calculates attention maps between content and style images, and transfers colors and patterns based on the attention map and the target palette. These modules enable the proposed stylization to focus on key features and generate plausibly transferred images. Thus, the contributions of the proposed method are to propose a novel deep learning-based style transfer method and present target feature palette and attention coloring modules, and provide in-depth analysis and insight on the proposed method via exhaustive ablation study. Qualitative and quantitative results show that our stylized images exhibit state-of-the-art performance, with strength in preserving core structures and details of the content image. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.04028v1-abstract-full').style.display = 'none'; document.getElementById('2111.04028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.01338">arXiv:2111.01338</a> <span> [<a href="https://arxiv.org/pdf/2111.01338">pdf</a>, <a href="https://arxiv.org/format/2111.01338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Federated Split Vision Transformer for COVID-19 CXR Diagnosis using Task-Agnostic Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangjoon Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Jeongsol Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+B">Boah Kim</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.01338v2-abstract-short" style="display: inline;"> Federated learning, which shares the weights of the neural network across clients, is gaining attention in the healthcare sector as it enables training on a large corpus of decentralized data while maintaining data privacy. For example, this enables neural network training for COVID-19 diagnosis on chest X-ray (CXR) images without collecting patient CXR data across multiple hospitals. Unfortunatel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01338v2-abstract-full').style.display = 'inline'; document.getElementById('2111.01338v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.01338v2-abstract-full" style="display: none;"> Federated learning, which shares the weights of the neural network across clients, is gaining attention in the healthcare sector as it enables training on a large corpus of decentralized data while maintaining data privacy. For example, this enables neural network training for COVID-19 diagnosis on chest X-ray (CXR) images without collecting patient CXR data across multiple hospitals. Unfortunately, the exchange of the weights quickly consumes the network bandwidth if highly expressive network architecture is employed. So-called split learning partially solves this problem by dividing a neural network into a client and a server part, so that the client part of the network takes up less extensive computation resources and bandwidth. However, it is not clear how to find the optimal split without sacrificing the overall network performance. To amalgamate these methods and thereby maximize their distinct strengths, here we show that the Vision Transformer, a recently developed deep learning architecture with straightforward decomposable configuration, is ideally suitable for split learning without sacrificing performance. Even under the non-independent and identically distributed data distribution which emulates a real collaboration between hospitals using CXR datasets from multiple sources, the proposed framework was able to attain performance comparable to data-centralized training. In addition, the proposed framework along with heterogeneous multi-task clients also improves individual task performances including the diagnosis of COVID-19, eliminating the need for sharing large weights with innumerable parameters. Our results affirm the suitability of Transformer for collaborative learning in medical imaging and pave the way forward for future real-world implementations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01338v2-abstract-full').style.display = 'none'; document.getElementById('2111.01338v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.03326">arXiv:2110.03326</a> <span> [<a href="https://arxiv.org/pdf/2110.03326">pdf</a>, <a href="https://arxiv.org/format/2110.03326">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Back from the future: bidirectional CTC decoding using future information in speech recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jung%2C+N">Namkyu Jung</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geonmin Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H">Han-Gyu Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.03326v1-abstract-short" style="display: inline;"> In this paper, we propose a simple but effective method to decode the output of Connectionist Temporal Classifier (CTC) model using a bi-directional neural language model. The bidirectional language model uses the future as well as the past information in order to predict the next output in the sequence. The proposed method based on bi-directional beam search takes advantage of the CTC greedy deco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.03326v1-abstract-full').style.display = 'inline'; document.getElementById('2110.03326v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.03326v1-abstract-full" style="display: none;"> In this paper, we propose a simple but effective method to decode the output of Connectionist Temporal Classifier (CTC) model using a bi-directional neural language model. The bidirectional language model uses the future as well as the past information in order to predict the next output in the sequence. The proposed method based on bi-directional beam search takes advantage of the CTC greedy decoding output to represent the noisy future information. Experiments on the Librispeechdataset demonstrate the superiority of our proposed method compared to baselines using unidirectional decoding. In particular, the boost inaccuracy is most apparent at the start of a sequence which is the most erroneous part for existing systems based on unidirectional decoding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.03326v1-abstract-full').style.display = 'none'; document.getElementById('2110.03326v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.02791">arXiv:2110.02791</a> <span> [<a href="https://arxiv.org/pdf/2110.02791">pdf</a>, <a href="https://arxiv.org/format/2110.02791">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Spell my name: keyword boosted speech recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jung%2C+N">Namkyu Jung</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geonmin Kim</a>, <a href="/search/eess?searchtype=author&query=Chung%2C+J+S">Joon Son Chung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.02791v1-abstract-short" style="display: inline;"> Recognition of uncommon words such as names and technical terminology is important to understanding conversations in context. However, the ability to recognise such words remains a challenge in modern automatic speech recognition (ASR) systems. In this paper, we propose a simple but powerful ASR decoding method that can better recognise these uncommon keywords, which in turn enables better reada… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02791v1-abstract-full').style.display = 'inline'; document.getElementById('2110.02791v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.02791v1-abstract-full" style="display: none;"> Recognition of uncommon words such as names and technical terminology is important to understanding conversations in context. However, the ability to recognise such words remains a challenge in modern automatic speech recognition (ASR) systems. In this paper, we propose a simple but powerful ASR decoding method that can better recognise these uncommon keywords, which in turn enables better readability of the results. The method boosts the probabilities of given keywords in a beam search based on acoustic model predictions. The method does not require any training in advance. We demonstrate the effectiveness of our method on the LibriSpeeech test sets and also internal data of real-world conversations. Our method significantly boosts keyword accuracy on the test sets, while maintaining the accuracy of the other words, and as well as providing significant qualitative improvements. This method is applicable to other tasks such as machine translation, or wherever unseen and difficult keywords need to be recognised in beam search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02791v1-abstract-full').style.display = 'none'; document.getElementById('2110.02791v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.09041">arXiv:2109.09041</a> <span> [<a href="https://arxiv.org/pdf/2109.09041">pdf</a>, <a href="https://arxiv.org/format/2109.09041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Online Distributed Trajectory Planning for Quadrotor Swarm with Feasibility Guarantee using Linear Safe Corridor </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Park%2C+J">Jungwon Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D">Dabin Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G+C">Gyeong Chan Kim</a>, <a href="/search/eess?searchtype=author&query=Oh%2C+D">Dahyun Oh</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+H+J">H. Jin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.09041v2-abstract-short" style="display: inline;"> This paper presents a new online multi-agent trajectory planning algorithm that guarantees to generate safe, dynamically feasible trajectories in a cluttered environment. The proposed algorithm utilizes a linear safe corridor (LSC) to formulate the distributed trajectory optimization problem with only feasible constraints, so it does not resort to slack variables or soft constraints to avoid optim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09041v2-abstract-full').style.display = 'inline'; document.getElementById('2109.09041v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.09041v2-abstract-full" style="display: none;"> This paper presents a new online multi-agent trajectory planning algorithm that guarantees to generate safe, dynamically feasible trajectories in a cluttered environment. The proposed algorithm utilizes a linear safe corridor (LSC) to formulate the distributed trajectory optimization problem with only feasible constraints, so it does not resort to slack variables or soft constraints to avoid optimization failure. We adopt a priority-based goal planning method to prevent the deadlock without an additional procedure to decide which robot to yield. The proposed algorithm can compute the trajectories for 60 agents on average 15.5 ms per agent with an Intel i7 laptop and shows a similar flight distance and distance compared to the baselines based on soft constraints. We verified that the proposed method can reach the goal without deadlock in both the random forest and the indoor space, and we validated the safety and operability of the proposed algorithm through a real flight test with ten quadrotors in a maze-like environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09041v2-abstract-full').style.display = 'none'; document.getElementById('2109.09041v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, RA-L 2022 under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.07120">arXiv:2109.07120</a> <span> [<a href="https://arxiv.org/pdf/2109.07120">pdf</a>, <a href="https://arxiv.org/format/2109.07120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LRA.2022.3191234">10.1109/LRA.2022.3191234 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Infusing model predictive control into meta-reinforcement learning for mobile robots in dynamic environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shin%2C+J">Jaeuk Shin</a>, <a href="/search/eess?searchtype=author&query=Hakobyan%2C+A">Astghik Hakobyan</a>, <a href="/search/eess?searchtype=author&query=Park%2C+M">Mingyu Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+Y">Yeoneung Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gihun Kim</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+I">Insoon Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.07120v3-abstract-short" style="display: inline;"> The successful operation of mobile robots requires them to adapt rapidly to environmental changes. To develop an adaptive decision-making tool for mobile robots, we propose a novel algorithm that combines meta-reinforcement learning (meta-RL) with model predictive control (MPC). Our method employs an off-policy meta-RL algorithm as a baseline to train a policy using transition samples generated by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.07120v3-abstract-full').style.display = 'inline'; document.getElementById('2109.07120v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.07120v3-abstract-full" style="display: none;"> The successful operation of mobile robots requires them to adapt rapidly to environmental changes. To develop an adaptive decision-making tool for mobile robots, we propose a novel algorithm that combines meta-reinforcement learning (meta-RL) with model predictive control (MPC). Our method employs an off-policy meta-RL algorithm as a baseline to train a policy using transition samples generated by MPC when the robot detects certain events that can be effectively handled by MPC, with its explicit use of robot dynamics. The key idea of our method is to switch between the meta-learned policy and the MPC controller in a randomized and event-triggered fashion to make up for suboptimal MPC actions caused by the limited prediction horizon. During meta-testing, the MPC module is deactivated to significantly reduce computation time in motion control. We further propose an online adaptation scheme that enables the robot to infer and adapt to a new task within a single trajectory. The performance of our method has been demonstrated through simulations using a nonlinear car-like vehicle model with (i) synthetic movements of obstacles, and (ii) real-world pedestrian motion data. The simulation results indicate that our method outperforms other algorithms in terms of learning efficiency and navigation quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.07120v3-abstract-full').style.display = 'none'; document.getElementById('2109.07120v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in the IEEE Robotics and Automation Letters</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Robotics and Automation Letters, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.07235">arXiv:2104.07235</a> <span> [<a href="https://arxiv.org/pdf/2104.07235">pdf</a>, <a href="https://arxiv.org/format/2104.07235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Vision Transformer using Low-level Chest X-ray Feature Corpus for COVID-19 Diagnosis and Severity Quantification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangjoon Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Oh%2C+Y">Yujin Oh</a>, <a href="/search/eess?searchtype=author&query=Seo%2C+J+B">Joon Beom Seo</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S+M">Sang Min Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+H">Jin Hwan Kim</a>, <a href="/search/eess?searchtype=author&query=Moon%2C+S">Sungjun Moon</a>, <a href="/search/eess?searchtype=author&query=Lim%2C+J">Jae-Kwang Lim</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.07235v1-abstract-short" style="display: inline;"> Developing a robust algorithm to diagnose and quantify the severity of COVID-19 using Chest X-ray (CXR) requires a large number of well-curated COVID-19 datasets, which is difficult to collect under the global COVID-19 pandemic. On the other hand, CXR data with other findings are abundant. This situation is ideally suited for the Vision Transformer (ViT) architecture, where a lot of unlabeled data… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.07235v1-abstract-full').style.display = 'inline'; document.getElementById('2104.07235v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.07235v1-abstract-full" style="display: none;"> Developing a robust algorithm to diagnose and quantify the severity of COVID-19 using Chest X-ray (CXR) requires a large number of well-curated COVID-19 datasets, which is difficult to collect under the global COVID-19 pandemic. On the other hand, CXR data with other findings are abundant. This situation is ideally suited for the Vision Transformer (ViT) architecture, where a lot of unlabeled data can be used through structural modeling by the self-attention mechanism. However, the use of existing ViT is not optimal, since feature embedding through direct patch flattening or ResNet backbone in the standard ViT is not intended for CXR. To address this problem, here we propose a novel Vision Transformer that utilizes low-level CXR feature corpus obtained from a backbone network that extracts common CXR findings. Specifically, the backbone network is first trained with large public datasets to detect common abnormal findings such as consolidation, opacity, edema, etc. Then, the embedded features from the backbone network are used as corpora for a Transformer model for the diagnosis and the severity quantification of COVID-19. We evaluate our model on various external test datasets from totally different institutions to evaluate the generalization capability. The experimental results confirm that our model can achieve the state-of-the-art performance in both diagnosis and severity quantification tasks with superior generalization capability, which are sine qua non of widespread deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.07235v1-abstract-full').style.display = 'none'; document.getElementById('2104.07235v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.06782">arXiv:2104.06782</a> <span> [<a href="https://arxiv.org/pdf/2104.06782">pdf</a>, <a href="https://arxiv.org/format/2104.06782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Visual Comfort Aware-Reinforcement Learning for Depth Adjustment of Stereoscopic 3D Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+H+G">Hak Gu Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+M">Minho Park</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Sangmin Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">Seongyeop Kim</a>, <a href="/search/eess?searchtype=author&query=Ro%2C+Y+M">Yong Man Ro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.06782v1-abstract-short" style="display: inline;"> Depth adjustment aims to enhance the visual experience of stereoscopic 3D (S3D) images, which accompanied with improving visual comfort and depth perception. For a human expert, the depth adjustment procedure is a sequence of iterative decision making. The human expert iteratively adjusts the depth until he is satisfied with the both levels of visual comfort and the perceived depth. In this work,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06782v1-abstract-full').style.display = 'inline'; document.getElementById('2104.06782v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.06782v1-abstract-full" style="display: none;"> Depth adjustment aims to enhance the visual experience of stereoscopic 3D (S3D) images, which accompanied with improving visual comfort and depth perception. For a human expert, the depth adjustment procedure is a sequence of iterative decision making. The human expert iteratively adjusts the depth until he is satisfied with the both levels of visual comfort and the perceived depth. In this work, we present a novel deep reinforcement learning (DRL)-based approach for depth adjustment named VCA-RL (Visual Comfort Aware Reinforcement Learning) to explicitly model human sequential decision making in depth editing operations. We formulate the depth adjustment process as a Markov decision process where actions are defined as camera movement operations to control the distance between the left and right cameras. Our agent is trained based on the guidance of an objective visual comfort assessment metric to learn the optimal sequence of camera movement actions in terms of perceptual aspects in stereoscopic viewing. With extensive experiments and user studies, we show the effectiveness of our VCA-RL model on three different S3D databases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06782v1-abstract-full').style.display = 'none'; document.getElementById('2104.06782v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.06780">arXiv:2104.06780</a> <span> [<a href="https://arxiv.org/pdf/2104.06780">pdf</a>, <a href="https://arxiv.org/format/2104.06780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Towards a Better Understanding of VR Sickness: Physical Symptom Prediction for VR Contents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+H+G">Hak Gu Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Sangmin Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S">Seongyeop Kim</a>, <a href="/search/eess?searchtype=author&query=Lim%2C+H">Heoun-taek Lim</a>, <a href="/search/eess?searchtype=author&query=Ro%2C+Y+M">Yong Man Ro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.06780v1-abstract-short" style="display: inline;"> We address the black-box issue of VR sickness assessment (VRSA) by evaluating the level of physical symptoms of VR sickness. For the VR contents inducing the similar VR sickness level, the physical symptoms can vary depending on the characteristics of the contents. Most of existing VRSA methods focused on assessing the overall VR sickness score. To make better understanding of VR sickness, it is r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06780v1-abstract-full').style.display = 'inline'; document.getElementById('2104.06780v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.06780v1-abstract-full" style="display: none;"> We address the black-box issue of VR sickness assessment (VRSA) by evaluating the level of physical symptoms of VR sickness. For the VR contents inducing the similar VR sickness level, the physical symptoms can vary depending on the characteristics of the contents. Most of existing VRSA methods focused on assessing the overall VR sickness score. To make better understanding of VR sickness, it is required to predict and provide the level of major symptoms of VR sickness rather than overall degree of VR sickness. In this paper, we predict the degrees of main physical symptoms affecting the overall degree of VR sickness, which are disorientation, nausea, and oculomotor. In addition, we introduce a new large-scale dataset for VRSA including 360 videos with various frame rates, physiological signals, and subjective scores. On VRSA benchmark and our newly collected dataset, our approach shows a potential to not only achieve the highest correlation with subjective scores, but also to better understand which symptoms are the main causes of VR sickness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06780v1-abstract-full').style.display = 'none'; document.getElementById('2104.06780v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.09022">arXiv:2103.09022</a> <span> [<a href="https://arxiv.org/pdf/2103.09022">pdf</a>, <a href="https://arxiv.org/format/2103.09022">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Missing Cone Artifacts Removal in ODT using Unsupervised Deep Learning in Projection Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chung%2C+H">Hyungjin Chung</a>, <a href="/search/eess?searchtype=author&query=Huh%2C+J">Jaeyoung Huh</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geon Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+Y+K">Yong Keun Park</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.09022v2-abstract-short" style="display: inline;"> Optical diffraction tomography (ODT) produces three dimensional distribution of refractive index (RI) by measuring scattering fields at various angles. Although the distribution of RI index is highly informative, due to the missing cone problem stemming from the limited-angle acquisition of holograms, reconstructions have very poor resolution along axial direction compared to the horizontal imagin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.09022v2-abstract-full').style.display = 'inline'; document.getElementById('2103.09022v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.09022v2-abstract-full" style="display: none;"> Optical diffraction tomography (ODT) produces three dimensional distribution of refractive index (RI) by measuring scattering fields at various angles. Although the distribution of RI index is highly informative, due to the missing cone problem stemming from the limited-angle acquisition of holograms, reconstructions have very poor resolution along axial direction compared to the horizontal imaging plane. To solve this issue, here we present a novel unsupervised deep learning framework, which learns the probability distribution of missing projection views through optimal transport driven cycleGAN. Experimental results show that missing cone artifact in ODT can be significantly resolved by the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.09022v2-abstract-full').style.display = 'none'; document.getElementById('2103.09022v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This will appear in IEEE Trans. on Computational Imaging</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.07062">arXiv:2103.07062</a> <span> [<a href="https://arxiv.org/pdf/2103.07062">pdf</a>, <a href="https://arxiv.org/format/2103.07062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Severity Quantification and Lesion Localization of COVID-19 on CXR using Vision Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangjoon Park</a>, <a href="/search/eess?searchtype=author&query=Oh%2C+Y">Yujin Oh</a>, <a href="/search/eess?searchtype=author&query=Seo%2C+J+B">Joon Beom Seo</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S+M">Sang Min Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+H">Jin Hwan Kim</a>, <a href="/search/eess?searchtype=author&query=Moon%2C+S">Sungjun Moon</a>, <a href="/search/eess?searchtype=author&query=Lim%2C+J">Jae-Kwang Lim</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.07062v1-abstract-short" style="display: inline;"> Under the global pandemic of COVID-19, building an automated framework that quantifies the severity of COVID-19 and localizes the relevant lesion on chest X-ray images has become increasingly important. Although pixel-level lesion severity labels, e.g. lesion segmentation, can be the most excellent target to build a robust model, collecting enough data with such labels is difficult due to time and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.07062v1-abstract-full').style.display = 'inline'; document.getElementById('2103.07062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.07062v1-abstract-full" style="display: none;"> Under the global pandemic of COVID-19, building an automated framework that quantifies the severity of COVID-19 and localizes the relevant lesion on chest X-ray images has become increasingly important. Although pixel-level lesion severity labels, e.g. lesion segmentation, can be the most excellent target to build a robust model, collecting enough data with such labels is difficult due to time and labor-intensive annotation tasks. Instead, array-based severity labeling that assigns integer scores on six subdivisions of lungs can be an alternative choice enabling the quick labeling. Several groups proposed deep learning algorithms that quantify the severity of COVID-19 using the array-based COVID-19 labels and localize the lesions with explainability maps. To further improve the accuracy and interpretability, here we propose a novel Vision Transformer tailored for both quantification of the severity and clinically applicable localization of the COVID-19 related lesions. Our model is trained in a weakly-supervised manner to generate the full probability maps from weak array-based labels. Furthermore, a novel progressive self-training method enables us to build a model with a small labeled dataset. The quantitative and qualitative analysis on the external testset demonstrates that our method shows comparable performance with radiologists for both tasks with stability in a real-world application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.07062v1-abstract-full').style.display = 'none'; document.getElementById('2103.07062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.07055">arXiv:2103.07055</a> <span> [<a href="https://arxiv.org/pdf/2103.07055">pdf</a>, <a href="https://arxiv.org/format/2103.07055">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Vision Transformer for COVID-19 CXR Diagnosis using Chest X-ray Feature Corpus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangjoon Park</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwanghyun Kim</a>, <a href="/search/eess?searchtype=author&query=Oh%2C+Y">Yujin Oh</a>, <a href="/search/eess?searchtype=author&query=Seo%2C+J+B">Joon Beom Seo</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S+M">Sang Min Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+H">Jin Hwan Kim</a>, <a href="/search/eess?searchtype=author&query=Moon%2C+S">Sungjun Moon</a>, <a href="/search/eess?searchtype=author&query=Lim%2C+J">Jae-Kwang Lim</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.07055v1-abstract-short" style="display: inline;"> Under the global COVID-19 crisis, developing robust diagnosis algorithm for COVID-19 using CXR is hampered by the lack of the well-curated COVID-19 data set, although CXR data with other disease are abundant. This situation is suitable for vision transformer architecture that can exploit the abundant unlabeled data using pre-training. However, the direct use of existing vision transformer that use… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.07055v1-abstract-full').style.display = 'inline'; document.getElementById('2103.07055v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.07055v1-abstract-full" style="display: none;"> Under the global COVID-19 crisis, developing robust diagnosis algorithm for COVID-19 using CXR is hampered by the lack of the well-curated COVID-19 data set, although CXR data with other disease are abundant. This situation is suitable for vision transformer architecture that can exploit the abundant unlabeled data using pre-training. However, the direct use of existing vision transformer that uses the corpus generated by the ResNet is not optimal for correct feature embedding. To mitigate this problem, we propose a novel vision Transformer by using the low-level CXR feature corpus that are obtained to extract the abnormal CXR features. Specifically, the backbone network is trained using large public datasets to obtain the abnormal features in routine diagnosis such as consolidation, glass-grass opacity (GGO), etc. Then, the embedded features from the backbone network are used as corpus for vision transformer training. We examine our model on various external test datasets acquired from totally different institutions to assess the generalization ability. Our experiments demonstrate that our method achieved the state-of-art performance and has better generalization capability, which are crucial for a widespread deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.07055v1-abstract-full').style.display = 'none'; document.getElementById('2103.07055v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.08567">arXiv:2102.08567</a> <span> [<a href="https://arxiv.org/pdf/2102.08567">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Ensemble Transfer Learning of Elastography and B-mode Breast Ultrasound Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Misra%2C+S">Sampa Misra</a>, <a href="/search/eess?searchtype=author&query=Jeon%2C+S">Seungwan Jeon</a>, <a href="/search/eess?searchtype=author&query=Managuli%2C+R">Ravi Managuli</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Seiyon Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gyuwon Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Seungchul Lee</a>, <a href="/search/eess?searchtype=author&query=Barr%2C+R+G">Richard G Barr</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+C">Chulhong Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.08567v1-abstract-short" style="display: inline;"> Computer-aided detection (CAD) of benign and malignant breast lesions becomes increasingly essential in breast ultrasound (US) imaging. The CAD systems rely on imaging features identified by the medical experts for their performance, whereas deep learning (DL) methods automatically extract features from the data. The challenge of the DL is the insufficiency of breast US images available to train t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.08567v1-abstract-full').style.display = 'inline'; document.getElementById('2102.08567v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.08567v1-abstract-full" style="display: none;"> Computer-aided detection (CAD) of benign and malignant breast lesions becomes increasingly essential in breast ultrasound (US) imaging. The CAD systems rely on imaging features identified by the medical experts for their performance, whereas deep learning (DL) methods automatically extract features from the data. The challenge of the DL is the insufficiency of breast US images available to train the DL models. Here, we present an ensemble transfer learning model to classify benign and malignant breast tumors using B-mode breast US (B-US) and strain elastography breast US (SE-US) images. This model combines semantic features from AlexNet & ResNet models to classify benign from malignant tumors. We use both B-US and SE-US images to train the model and classify the tumors. We retrospectively gathered 85 patients' data, with 42 benign and 43 malignant cases confirmed with the biopsy. Each patient had multiple B-US and their corresponding SE-US images, and the total dataset contained 261 B-US images and 261 SE-US images. Experimental results show that our ensemble model achieves a sensitivity of 88.89% and specificity of 91.10%. These diagnostic performances of the proposed method are equivalent to or better than manual identification. Thus, our proposed ensemble learning method would facilitate detecting early breast cancer, reliably improving patient care. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.08567v1-abstract-full').style.display = 'none'; document.getElementById('2102.08567v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 10 figures, 6 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.13105">arXiv:2010.13105</a> <span> [<a href="https://arxiv.org/pdf/2010.13105">pdf</a>, <a href="https://arxiv.org/format/2010.13105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Two-stage Textual Knowledge Distillation for End-to-End Spoken Language Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+S">Seongbin Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gyuwan Kim</a>, <a href="/search/eess?searchtype=author&query=Shin%2C+S">Seongjin Shin</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Sangmin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.13105v2-abstract-short" style="display: inline;"> End-to-end approaches open a new way for more accurate and efficient spoken language understanding (SLU) systems by alleviating the drawbacks of traditional pipeline systems. Previous works exploit textual information for an SLU model via pre-training with automatic speech recognition or fine-tuning with knowledge distillation. To utilize textual information more effectively, this work proposes a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13105v2-abstract-full').style.display = 'inline'; document.getElementById('2010.13105v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.13105v2-abstract-full" style="display: none;"> End-to-end approaches open a new way for more accurate and efficient spoken language understanding (SLU) systems by alleviating the drawbacks of traditional pipeline systems. Previous works exploit textual information for an SLU model via pre-training with automatic speech recognition or fine-tuning with knowledge distillation. To utilize textual information more effectively, this work proposes a two-stage textual knowledge distillation method that matches utterance-level representations and predicted logits of two modalities during pre-training and fine-tuning, sequentially. We use vq-wav2vec BERT as a speech encoder because it captures general and rich features. Furthermore, we improve the performance, especially in a low-resource scenario, with data augmentation methods by randomly masking spans of discrete audio tokens and contextualized hidden representations. Consequently, we push the state-of-the-art on the Fluent Speech Commands, achieving 99.7% test accuracy in the full dataset setting and 99.5% in the 10% subset setting. Throughout the ablation studies, we empirically verify that all used methods are crucial to the final performance, providing the best practice for spoken language understanding. Code is available at https://github.com/clovaai/textual-kd-slu. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13105v2-abstract-full').style.display = 'none'; document.getElementById('2010.13105v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICASSP 2021; 5 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.01721">arXiv:2010.01721</a> <span> [<a href="https://arxiv.org/pdf/2010.01721">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Motion Correction of 3D Dynamic Contrast-Enhanced Ultrasound Imaging without Anatomical Bmode Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jia-Shu Chen</a>, <a href="/search/eess?searchtype=author&query=D.%2C+M+G+P">Maged Goubran Ph. D.</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gaeun Kim</a>, <a href="/search/eess?searchtype=author&query=D.%2C+J+K+W+M">Jurgen K. Willmann M. D.</a>, <a href="/search/eess?searchtype=author&query=D.%2C+M+Z+M">Michael Zeineh M. D.</a>, <a href="/search/eess?searchtype=author&query=D.%2C+P">Ph. D.</a>, <a href="/search/eess?searchtype=author&query=D.%2C+D+H+P">Dimitre Hristov Ph. D.</a>, <a href="/search/eess?searchtype=author&query=D%2C+A+E+K+P">Ahmed El Kaffas Ph. D</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.01721v1-abstract-short" style="display: inline;"> In conventional 2D DCE-US, motion correction algorithms take advantage of accompanying side-by-side anatomical Bmode images that contain time-stable features. However, current commercial models of 3D DCE-US do not provide side-by-side Bmode images, which makes motion correction challenging. This work introduces a novel motion correction (MC) algorithm for 3D DCE-US and assesses its efficacy when h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.01721v1-abstract-full').style.display = 'inline'; document.getElementById('2010.01721v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.01721v1-abstract-full" style="display: none;"> In conventional 2D DCE-US, motion correction algorithms take advantage of accompanying side-by-side anatomical Bmode images that contain time-stable features. However, current commercial models of 3D DCE-US do not provide side-by-side Bmode images, which makes motion correction challenging. This work introduces a novel motion correction (MC) algorithm for 3D DCE-US and assesses its efficacy when handling clinical data sets. In brief, the algorithm uses a pyramidal approach whereby short temporal windows consisting of 3-6 consecutive frames are created to perform local registrations, which are then registered to a master reference derived from a weighted average of all frames. We evaluated the algorithm in 8 patients with metastatic lesions in the liver using the Philips X6-1 matrix transducer at a frame rate of 1-3 Hz. We assessed improvements in original vs. motion corrected 3D DCE-US cine using: i) frame-to-frame volumetric overlap of segmented lesions, ii) normalized correlation coefficient (NCC) between frames (similarity analysis), and iii) sum of squared errors (SSE), root-mean-squared error (RMSE), and r-squared (R2) quality-of-fit from fitted time-intensity curves (TIC) extracted from a segmented lesion. Overall, results demonstrate significant decreases in 3D DCE-US motion after applying the proposed algorithm. We noted significant improvements in frame-to-frame lesion overlap across all patients, from 68% without correction to 83% with motion correction (p = 0.023). Frame-to-frame similarity as assessed by NCC also significantly improved on two different sets of time points from 0.694 (original cine) to 0.862 (corresponding MC cine) and 0.723 to 0.886. TIC analysis displayed a significant decrease in RMSE (p = 0.018) and a significant increase in R2 goodness-of-fit (p = 0.029) for the patient cohort. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.01721v1-abstract-full').style.display = 'none'; document.getElementById('2010.01721v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.13777">arXiv:2009.13777</a> <span> [<a href="https://arxiv.org/pdf/2009.13777">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> DeepRegularizer: Rapid Resolution Enhancement of Tomographic Imaging using Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ryu%2C+D">DongHun Ryu</a>, <a href="/search/eess?searchtype=author&query=Ryu%2C+D">Dongmin Ryu</a>, <a href="/search/eess?searchtype=author&query=Baek%2C+Y">YoonSeok Baek</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+H">Hyungjoo Cho</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Geon Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+Y+S">Young Seo Kim</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+Y">Yongki Lee</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+Y">Yoosik Kim</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J+C">Jong Chul Ye</a>, <a href="/search/eess?searchtype=author&query=Min%2C+H">Hyun-Seok Min</a>, <a href="/search/eess?searchtype=author&query=Park%2C+Y">YongKeun Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.13777v1-abstract-short" style="display: inline;"> Optical diffraction tomography measures the three-dimensional refractive index map of a specimen and visualizes biochemical phenomena at the nanoscale in a non-destructive manner. One major drawback of optical diffraction tomography is poor axial resolution due to limited access to the three-dimensional optical transfer function. This missing cone problem has been addressed through regularization… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13777v1-abstract-full').style.display = 'inline'; document.getElementById('2009.13777v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.13777v1-abstract-full" style="display: none;"> Optical diffraction tomography measures the three-dimensional refractive index map of a specimen and visualizes biochemical phenomena at the nanoscale in a non-destructive manner. One major drawback of optical diffraction tomography is poor axial resolution due to limited access to the three-dimensional optical transfer function. This missing cone problem has been addressed through regularization algorithms that use a priori information, such as non-negativity and sample smoothness. However, the iterative nature of these algorithms and their parameter dependency make real-time visualization impossible. In this article, we propose and experimentally demonstrate a deep neural network, which we term DeepRegularizer, that rapidly improves the resolution of a three-dimensional refractive index map. Trained with pairs of datasets (a raw refractive index tomogram and a resolution-enhanced refractive index tomogram via the iterative total variation algorithm), the three-dimensional U-net-based convolutional neural network learns a transformation between the two tomogram domains. The feasibility and generalizability of our network are demonstrated using bacterial cells and a human leukaemic cell line, and by validating the model across different samples. DeepRegularizer offers more than an order of magnitude faster regularization performance compared to the conventional iterative method. We envision that the proposed data-driven approach can bypass the high time complexity of various image reconstructions in other imaging modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13777v1-abstract-full').style.display = 'none'; document.getElementById('2009.13777v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.09282">arXiv:2009.09282</a> <span> [<a href="https://arxiv.org/pdf/2009.09282">pdf</a>, <a href="https://arxiv.org/format/2009.09282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Reducing false-positive biopsies with deep neural networks that utilize local and global information in screening mammograms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+N">Nan Wu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Zhe Huang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+Y">Yiqiu Shen</a>, <a href="/search/eess?searchtype=author&query=Park%2C+J">Jungkyu Park</a>, <a href="/search/eess?searchtype=author&query=Phang%2C+J">Jason Phang</a>, <a href="/search/eess?searchtype=author&query=Makino%2C+T">Taro Makino</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S+G">S. Gene Kim</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/eess?searchtype=author&query=Heacock%2C+L">Laura Heacock</a>, <a href="/search/eess?searchtype=author&query=Moy%2C+L">Linda Moy</a>, <a href="/search/eess?searchtype=author&query=Geras%2C+K+J">Krzysztof J. Geras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.09282v1-abstract-short" style="display: inline;"> Breast cancer is the most common cancer in women, and hundreds of thousands of unnecessary biopsies are done around the world at a tremendous cost. It is crucial to reduce the rate of biopsies that turn out to be benign tissue. In this study, we build deep neural networks (DNNs) to classify biopsied lesions as being either malignant or benign, with the goal of using these networks as second reader… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.09282v1-abstract-full').style.display = 'inline'; document.getElementById('2009.09282v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.09282v1-abstract-full" style="display: none;"> Breast cancer is the most common cancer in women, and hundreds of thousands of unnecessary biopsies are done around the world at a tremendous cost. It is crucial to reduce the rate of biopsies that turn out to be benign tissue. In this study, we build deep neural networks (DNNs) to classify biopsied lesions as being either malignant or benign, with the goal of using these networks as second readers serving radiologists to further reduce the number of false positive findings. We enhance the performance of DNNs that are trained to learn from small image patches by integrating global context provided in the form of saliency maps learned from the entire image into their reasoning, similar to how radiologists consider global context when evaluating areas of interest. Our experiments are conducted on a dataset of 229,426 screening mammography exams from 141,473 patients. We achieve an AUC of 0.8 on a test set consisting of 464 benign and 136 malignant lesions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.09282v1-abstract-full').style.display = 'none'; document.getElementById('2009.09282v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.12493">arXiv:2008.12493</a> <span> [<a href="https://arxiv.org/pdf/2008.12493">pdf</a>, <a href="https://arxiv.org/format/2008.12493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DALE : Dark Region-Aware Low-light Image Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kwon%2C+D">Dokyeong Kwon</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Guisik Kim</a>, <a href="/search/eess?searchtype=author&query=Kwon%2C+J">Junseok Kwon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.12493v1-abstract-short" style="display: inline;"> In this paper, we present a novel low-light image enhancement method called dark region-aware low-light image enhancement (DALE), where dark regions are accurately recognized by the proposed visual attention module and their brightness are intensively enhanced. Our method can estimate the visual attention in an efficient manner using super-pixels without any complicated process. Thus, the method c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12493v1-abstract-full').style.display = 'inline'; document.getElementById('2008.12493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.12493v1-abstract-full" style="display: none;"> In this paper, we present a novel low-light image enhancement method called dark region-aware low-light image enhancement (DALE), where dark regions are accurately recognized by the proposed visual attention module and their brightness are intensively enhanced. Our method can estimate the visual attention in an efficient manner using super-pixels without any complicated process. Thus, the method can preserve the color, tone, and brightness of original images and prevents normally illuminated areas of the images from being saturated and distorted. Experimental results show that our method accurately identifies dark regions via the proposed visual attention, and qualitatively and quantitatively outperforms state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12493v1-abstract-full').style.display = 'none'; document.getElementById('2008.12493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures, The 31st British Machine Vision Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.01950">arXiv:2008.01950</a> <span> [<a href="https://arxiv.org/pdf/2008.01950">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Area-wide traffic signal control based on a deep graph Q-Network (DGQN) trained in an asynchronous manner </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gyeongjun Kim</a>, <a href="/search/eess?searchtype=author&query=Sohn%2C+K">Keemin Sohn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.01950v1-abstract-short" style="display: inline;"> Reinforcement learning (RL) algorithms have been widely applied in traffic signal studies. There are, however, several problems in jointly controlling traffic lights for a large transportation network. First, the action space exponentially explodes as the number of intersections to be jointly controlled increases. Although a multi-agent RL algorithm has been used to solve the curse of dimensionali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.01950v1-abstract-full').style.display = 'inline'; document.getElementById('2008.01950v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.01950v1-abstract-full" style="display: none;"> Reinforcement learning (RL) algorithms have been widely applied in traffic signal studies. There are, however, several problems in jointly controlling traffic lights for a large transportation network. First, the action space exponentially explodes as the number of intersections to be jointly controlled increases. Although a multi-agent RL algorithm has been used to solve the curse of dimensionality, this neither guaranteed a global optimum, nor could it break the ties between joint actions. The problem was circumvented by revising the output structure of a deep Q-network (DQN) within the framework of a single-agent RL algorithm. Second, when mapping traffic states into an action value, it is difficult to consider spatio-temporal correlations over a large transportation network. A deep graph Q-network (DGQN) was devised to efficiently accommodate spatio-temporal dependencies on a large scale. Finally, training a RL model to jointly control traffic lights in a large transportation network requires much time to converge. An asynchronous update methodology was devised for a DGQN to quickly reach an optimal policy. Using these three remedies, a DGQN succeeded in jointly controlling the traffic lights in a large transportation network in Seoul. This approach outperformed other state-of-the-art RL algorithms as well as an actual fixed-signal operation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.01950v1-abstract-full').style.display = 'none'; document.getElementById('2008.01950v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 10 figures, and 4 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T05 (Primary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.01996">arXiv:2005.01996</a> <span> [<a href="https://arxiv.org/pdf/2005.01996">pdf</a>, <a href="https://arxiv.org/format/2005.01996">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2020 Challenge on Real-World Image Super-Resolution: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lugmayr%2C+A">Andreas Lugmayr</a>, <a href="/search/eess?searchtype=author&query=Danelljan%2C+M">Martin Danelljan</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Ahn%2C+N">Namhyuk Ahn</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+D">Dongwoon Bai</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+J">Jie Cai</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+Y">Yun Cao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Junyang Chen</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+K">Kaihua Cheng</a>, <a href="/search/eess?searchtype=author&query=Chun%2C+S">SeYoung Chun</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+W">Wei Deng</a>, <a href="/search/eess?searchtype=author&query=El-Khamy%2C+M">Mostafa El-Khamy</a>, <a href="/search/eess?searchtype=author&query=Ho%2C+C+M">Chiu Man Ho</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+X">Xiaozhong Ji</a>, <a href="/search/eess?searchtype=author&query=Kheradmand%2C+A">Amin Kheradmand</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gwantae Kim</a>, <a href="/search/eess?searchtype=author&query=Ko%2C+H">Hanseok Ko</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+K">Kanghyu Lee</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+J">Jungwon Lee</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Ziluan Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zhi-Song Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yunhua Lu</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+Z">Zibo Meng</a> , et al. (21 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.01996v1-abstract-short" style="display: inline;"> This paper reviews the NTIRE 2020 challenge on real world super-resolution. It focuses on the participating methods and final results. The challenge addresses the real world setting, where paired true high and low-resolution images are unavailable. For training, only one set of source input images is therefore provided along with a set of unpaired high-quality target images. In Track 1: Image Proc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.01996v1-abstract-full').style.display = 'inline'; document.getElementById('2005.01996v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.01996v1-abstract-full" style="display: none;"> This paper reviews the NTIRE 2020 challenge on real world super-resolution. It focuses on the participating methods and final results. The challenge addresses the real world setting, where paired true high and low-resolution images are unavailable. For training, only one set of source input images is therefore provided along with a set of unpaired high-quality target images. In Track 1: Image Processing artifacts, the aim is to super-resolve images with synthetically generated image processing artifacts. This allows for quantitative benchmarking of the approaches \wrt a ground-truth image. In Track 2: Smartphone Images, real low-quality smart phone images have to be super-resolved. In both tracks, the ultimate goal is to achieve the best perceptual quality, evaluated using a human study. This is the second challenge on the subject, following AIM 2019, targeting to advance the state-of-the-art in super-resolution. To measure the performance we use the benchmark protocol from AIM 2019. In total 22 teams competed in the final testing phase, demonstrating new and innovative solutions to the problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.01996v1-abstract-full').style.display = 'none'; document.getElementById('2005.01996v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.01056">arXiv:2005.01056</a> <span> [<a href="https://arxiv.org/pdf/2005.01056">pdf</a>, <a href="https://arxiv.org/format/2005.01056">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2020 Challenge on Perceptual Extreme Super-Resolution: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Shang%2C+T">Taizhang Shang</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Q">Qiuju Dai</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Shengchen Zhu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+T">Tong Yang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yandong Guo</a>, <a href="/search/eess?searchtype=author&query=Jo%2C+Y">Younghyun Jo</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S">Sejong Yang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S+J">Seon Joo Kim</a>, <a href="/search/eess?searchtype=author&query=Zha%2C+L">Lin Zha</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jiande Jiang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xinbo Gao</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+W">Wen Lu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jing Liu</a>, <a href="/search/eess?searchtype=author&query=Yoon%2C+K">Kwangjin Yoon</a>, <a href="/search/eess?searchtype=author&query=Jeon%2C+T">Taegyun Jeon</a>, <a href="/search/eess?searchtype=author&query=Akita%2C+K">Kazutoshi Akita</a>, <a href="/search/eess?searchtype=author&query=Ooba%2C+T">Takeru Ooba</a>, <a href="/search/eess?searchtype=author&query=Ukita%2C+N">Norimichi Ukita</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Z">Zhipeng Luo</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+Y">Yuehan Yao</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Zhenyu Xu</a>, <a href="/search/eess?searchtype=author&query=He%2C+D">Dongliang He</a> , et al. (38 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.01056v1-abstract-short" style="display: inline;"> This paper reviews the NTIRE 2020 challenge on perceptual extreme super-resolution with focus on proposed solutions and results. The challenge task was to super-resolve an input image with a magnification factor 16 based on a set of prior examples of low and corresponding high resolution images. The goal is to obtain a network design capable to produce high resolution results with the best percept… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.01056v1-abstract-full').style.display = 'inline'; document.getElementById('2005.01056v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.01056v1-abstract-full" style="display: none;"> This paper reviews the NTIRE 2020 challenge on perceptual extreme super-resolution with focus on proposed solutions and results. The challenge task was to super-resolve an input image with a magnification factor 16 based on a set of prior examples of low and corresponding high resolution images. The goal is to obtain a network design capable to produce high resolution results with the best perceptual quality and similar to the ground truth. The track had 280 registered participants, and 19 teams submitted the final results. They gauge the state-of-the-art in single image super-resolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.01056v1-abstract-full').style.display = 'none'; document.getElementById('2005.01056v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPRW 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.03842">arXiv:2004.03842</a> <span> [<a href="https://arxiv.org/pdf/2004.03842">pdf</a>, <a href="https://arxiv.org/format/2004.03842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multi-Head Attention based Probabilistic Vehicle Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kim%2C+H">Hayoung Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D">Dongchan Kim</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+G">Gihoon Kim</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+J">Jeongmin Cho</a>, <a href="/search/eess?searchtype=author&query=Huh%2C+K">Kunsoo Huh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.03842v3-abstract-short" style="display: inline;"> This paper presents online-capable deep learning model for probabilistic vehicle trajectory prediction. We propose a simple encoder-decoder architecture based on multi-head attention. The proposed model generates the distribution of the predicted trajectories for multiple vehicles in parallel. Our approach to model the interactions can learn to attend to a few influential vehicles in an unsupervis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03842v3-abstract-full').style.display = 'inline'; document.getElementById('2004.03842v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.03842v3-abstract-full" style="display: none;"> This paper presents online-capable deep learning model for probabilistic vehicle trajectory prediction. We propose a simple encoder-decoder architecture based on multi-head attention. The proposed model generates the distribution of the predicted trajectories for multiple vehicles in parallel. Our approach to model the interactions can learn to attend to a few influential vehicles in an unsupervised manner, which can improve the interpretability of the network. The experiments using naturalistic trajectories at highway show the clear improvement in terms of positional error on both longitudinal and lateral direction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03842v3-abstract-full').style.display = 'none'; document.getElementById('2004.03842v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 figures, 2020 IEEE Intelligent Vehicles Symposium (IV)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.07613">arXiv:2002.07613</a> <span> [<a href="https://arxiv.org/pdf/2002.07613">pdf</a>, <a href="https://arxiv.org/format/2002.07613">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> An interpretable classifier for high-resolution breast cancer screening images utilizing weakly supervised localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shen%2C+Y">Yiqiu Shen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+N">Nan Wu</a>, <a href="/search/eess?searchtype=author&query=Phang%2C+J">Jason Phang</a>, <a href="/search/eess?searchtype=author&query=Park%2C+J">Jungkyu Park</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+K">Kangning Liu</a>, <a href="/search/eess?searchtype=author&query=Tyagi%2C+S">Sudarshini Tyagi</a>, <a href="/search/eess?searchtype=author&query=Heacock%2C+L">Laura Heacock</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+S+G">S. Gene Kim</a>, <a href="/search/eess?searchtype=author&query=Moy%2C+L">Linda Moy</a>, <a href="/search/eess?searchtype=author&query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/eess?searchtype=author&query=Geras%2C+K+J">Krzysztof J. Geras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.07613v1-abstract-short" style="display: inline;"> Medical images differ from natural images in significantly higher resolutions and smaller regions of interest. Because of these differences, neural network architectures that work well for natural images might not be applicable to medical image analysis. In this work, we extend the globally-aware multiple instance classifier, a framework we proposed to address these unique properties of medical im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.07613v1-abstract-full').style.display = 'inline'; document.getElementById('2002.07613v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.07613v1-abstract-full" style="display: none;"> Medical images differ from natural images in significantly higher resolutions and smaller regions of interest. Because of these differences, neural network architectures that work well for natural images might not be applicable to medical image analysis. In this work, we extend the globally-aware multiple instance classifier, a framework we proposed to address these unique properties of medical images. This model first uses a low-capacity, yet memory-efficient, network on the whole image to identify the most informative regions. It then applies another higher-capacity network to collect details from chosen regions. Finally, it employs a fusion module that aggregates global and local information to make a final prediction. While existing methods often require lesion segmentation during training, our model is trained with only image-level labels and can generate pixel-level saliency maps indicating possible malignant findings. We apply the model to screening mammography interpretation: predicting the presence or absence of benign and malignant lesions. On the NYU Breast Cancer Screening Dataset, consisting of more than one million images, our model achieves an AUC of 0.93 in classifying breasts with malignant findings, outperforming ResNet-34 and Faster R-CNN. Compared to ResNet-34, our model is 4.1x faster for inference while using 78.4% less GPU memory. Furthermore, we demonstrate, in a reader study, that our model surpasses radiologist-level AUC by a margin of 0.11. The proposed model is available online: https://github.com/nyukat/GMIC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.07613v1-abstract-full').style.display = 'none'; document.getElementById('2002.07613v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.11027">arXiv:1912.11027</a> <span> [<a href="https://arxiv.org/pdf/1912.11027">pdf</a>, <a href="https://arxiv.org/format/1912.11027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust breast cancer detection in mammography and digital breast tomosynthesis using annotation-efficient deep learning approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lotter%2C+W">William Lotter</a>, <a href="/search/eess?searchtype=author&query=Diab%2C+A+R">Abdul Rahman Diab</a>, <a href="/search/eess?searchtype=author&query=Haslam%2C+B">Bryan Haslam</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J+G">Jiye G. Kim</a>, <a href="/search/eess?searchtype=author&query=Grisot%2C+G">Giorgia Grisot</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+E">Eric Wu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+K">Kevin Wu</a>, <a href="/search/eess?searchtype=author&query=Onieva%2C+J+O">Jorge Onieva Onieva</a>, <a href="/search/eess?searchtype=author&query=Boxerman%2C+J+L">Jerrold L. Boxerman</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+M">Meiyun Wang</a>, <a href="/search/eess?searchtype=author&query=Bandler%2C+M">Mack Bandler</a>, <a href="/search/eess?searchtype=author&query=Vijayaraghavan%2C+G">Gopal Vijayaraghavan</a>, <a href="/search/eess?searchtype=author&query=Sorensen%2C+A+G">A. Gregory Sorensen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.11027v2-abstract-short" style="display: inline;"> Breast cancer remains a global challenge, causing over 1 million deaths globally in 2018. To achieve earlier breast cancer detection, screening x-ray mammography is recommended by health organizations worldwide and has been estimated to decrease breast cancer mortality by 20-40%. Nevertheless, significant false positive and false negative rates, as well as high interpretation costs, leave opportun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.11027v2-abstract-full').style.display = 'inline'; document.getElementById('1912.11027v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.11027v2-abstract-full" style="display: none;"> Breast cancer remains a global challenge, causing over 1 million deaths globally in 2018. To achieve earlier breast cancer detection, screening x-ray mammography is recommended by health organizations worldwide and has been estimated to decrease breast cancer mortality by 20-40%. Nevertheless, significant false positive and false negative rates, as well as high interpretation costs, leave opportunities for improving quality and access. To address these limitations, there has been much recent interest in applying deep learning to mammography; however, obtaining large amounts of annotated data poses a challenge for training deep learning models for this purpose, as does ensuring generalization beyond the populations represented in the training dataset. Here, we present an annotation-efficient deep learning approach that 1) achieves state-of-the-art performance in mammogram classification, 2) successfully extends to digital breast tomosynthesis (DBT; "3D mammography"), 3) detects cancers in clinically-negative prior mammograms of cancer patients, 4) generalizes well to a population with low screening rates, and 5) outperforms five-out-of-five full-time breast imaging specialists by improving absolute sensitivity by an average of 14%. Our results demonstrate promise towards software that can improve the accuracy of and access to screening mammography worldwide. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.11027v2-abstract-full').style.display = 'none'; document.getElementById('1912.11027v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Kim%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Kim%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Kim%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository