CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 114 results for author: <span class="mathjax">Jung, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Jung%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jung, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jung%2C+Y&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jung, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06071">arXiv:2411.06071</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06071">pdf</a>, <a href="https://arxiv.org/format/2411.06071">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GlocalCLIP: Object-agnostic Global-Local Prompt Learning for Zero-shot Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ham%2C+J">Jiyul Ham</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yonggon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Baek%2C+J">Jun-Geol Baek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06071v1-abstract-short" style="display: inline;"> Zero-shot anomaly detection (ZSAD) is crucial for detecting abnormal patterns in target datasets without using training samples, specifically in scenarios where there are distributional differences between the target domain and training data or where data scarcity arises because of restricted access. Although recently pretrained vision-language models demonstrate strong zero-shot performance acros&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06071v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06071v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06071v1-abstract-full" style="display: none;"> Zero-shot anomaly detection (ZSAD) is crucial for detecting abnormal patterns in target datasets without using training samples, specifically in scenarios where there are distributional differences between the target domain and training data or where data scarcity arises because of restricted access. Although recently pretrained vision-language models demonstrate strong zero-shot performance across various visual tasks, they focus on learning class semantics, which makes their direct application to ZSAD challenging. To address this scenario, we propose GlocalCLIP, which uniquely separates global and local prompts and jointly optimizes them. This approach enables the object-agnostic glocal semantic prompt design to effectively capture general normal and anomalous patterns without dependency on specific objects in the image. We refine the text prompts for more precise adjustments by utilizing deep-text prompt tuning in the text encoder. In the vision encoder, we apply V-V attention layers to capture detailed local image features. Finally, we introduce glocal contrastive learning to improve the complementary learning of global and local prompts, effectively detecting abnormal patterns across various domains. The generalization performance of GlocalCLIP in ZSAD was demonstrated on 15 real-world datasets from both the industrial and medical domains, achieving superior performance compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06071v1-abstract-full').style.display = 'none'; document.getElementById('2411.06071v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 33 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00360">arXiv:2411.00360</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00360">pdf</a>, <a href="https://arxiv.org/format/2411.00360">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Simple Remedy for Dataset Bias via Self-Influence: A Mislabeled Sample Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonsung Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+J">Jaeyun Song</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J+Y">June Yong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jin-Hwa Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sung-Yub Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+E">Eunho Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00360v1-abstract-short" style="display: inline;"> Learning generalized models from biased data is an important undertaking toward fairness in deep learning. To address this issue, recent studies attempt to identify and leverage bias-conflicting samples free from spurious correlations without prior knowledge of bias or an unbiased set. However, spurious correlation remains an ongoing challenge, primarily due to the difficulty in precisely detectin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00360v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00360v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00360v1-abstract-full" style="display: none;"> Learning generalized models from biased data is an important undertaking toward fairness in deep learning. To address this issue, recent studies attempt to identify and leverage bias-conflicting samples free from spurious correlations without prior knowledge of bias or an unbiased set. However, spurious correlation remains an ongoing challenge, primarily due to the difficulty in precisely detecting these samples. In this paper, inspired by the similarities between mislabeled samples and bias-conflicting samples, we approach this challenge from a novel perspective of mislabeled sample detection. Specifically, we delve into Influence Function, one of the standard methods for mislabeled sample detection, for identifying bias-conflicting samples and propose a simple yet effective remedy for biased models by leveraging them. Through comprehensive analysis and experiments on diverse datasets, we demonstrate that our new perspective can boost the precision of detection and rectify biased models effectively. Furthermore, our approach is complementary to existing methods, showing performance improvement even when applied to models that have already undergone recent debiasing techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00360v1-abstract-full').style.display = 'none'; document.getElementById('2411.00360v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15609">arXiv:2410.15609</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15609">pdf</a>, <a href="https://arxiv.org/format/2410.15609">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Interventional Speech Noise Injection for ASR Generalizable Spoken Language Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonjoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Jaeseong Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+S">Seungtaek Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Dohyeon Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+M">Minsoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+S">Seung-won Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15609v1-abstract-short" style="display: inline;"> Recently, pre-trained language models (PLMs) have been increasingly adopted in spoken language understanding (SLU). However, automatic speech recognition (ASR) systems frequently produce inaccurate transcriptions, leading to noisy inputs for SLU models, which can significantly degrade their performance. To address this, our objective is to train SLU models to withstand ASR errors by exposing them&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15609v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15609v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15609v1-abstract-full" style="display: none;"> Recently, pre-trained language models (PLMs) have been increasingly adopted in spoken language understanding (SLU). However, automatic speech recognition (ASR) systems frequently produce inaccurate transcriptions, leading to noisy inputs for SLU models, which can significantly degrade their performance. To address this, our objective is to train SLU models to withstand ASR errors by exposing them to noises commonly observed in ASR systems, referred to as ASR-plausible noises. Speech noise injection (SNI) methods have pursued this objective by introducing ASR-plausible noises, but we argue that these methods are inherently biased towards specific ASR systems, or ASR-specific noises. In this work, we propose a novel and less biased augmentation method of introducing the noises that are plausible to any ASR system, by cutting off the non-causal effect of noises. Experimental results and analyses demonstrate the effectiveness of our proposed methods in enhancing the robustness and generalizability of SLU models against unseen ASR systems by introducing more diverse and plausible ASR noises in advance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15609v1-abstract-full').style.display = 'none'; document.getElementById('2410.15609v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11374">arXiv:2410.11374</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11374">pdf</a>, <a href="https://arxiv.org/format/2410.11374">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Augmentation-Driven Metric for Balancing Preservation and Modification in Text-Guided Image Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+Y">Yoonjeon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Ryu%2C+S">Soohyun Ryu</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonsung Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Hyunkoo Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Joowon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J+Y">June Yong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+J">Jaeryong Hwang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+E">Eunho Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11374v1-abstract-short" style="display: inline;"> The development of vision-language and generative models has significantly advanced text-guided image editing, which seeks \textit{preservation} of core elements in the source image while implementing \textit{modifications} based on the target text. However, in the absence of evaluation metrics specifically tailored for text-guided image editing, existing metrics are limited in balancing the consi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11374v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11374v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11374v1-abstract-full" style="display: none;"> The development of vision-language and generative models has significantly advanced text-guided image editing, which seeks \textit{preservation} of core elements in the source image while implementing \textit{modifications} based on the target text. However, in the absence of evaluation metrics specifically tailored for text-guided image editing, existing metrics are limited in balancing the consideration of preservation and modification. Especially, our analysis reveals that CLIPScore, the most commonly used metric, tends to favor modification and ignore core attributes to be preserved, resulting in inaccurate evaluations. To address this problem, we propose \texttt{AugCLIP}, \black{which balances preservation and modification by estimating the representation of an ideal edited image that aligns with the target text with minimum alteration on the source image. We augment detailed textual descriptions on the source image and the target text using a multi-modal large language model, to model a hyperplane that separates CLIP space into source or target. The representation of the ideal edited image is an orthogonal projection of the source image into the hyperplane, which encapsulates the relative importance of each attribute considering the interdependent relationships.} Our extensive experiments on five benchmark datasets, encompassing a diverse range of editing scenarios, demonstrate that \texttt{AugCLIP} aligns remarkably well with human evaluation standards compared to existing metrics. The code for evaluation will be open-sourced to contribute to the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11374v1-abstract-full').style.display = 'none'; document.getElementById('2410.11374v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05449">arXiv:2410.05449</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.05449">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Skin Controlled Electronic and Neuromorphic Tattoos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kireev%2C+D">Dmitry Kireev</a>, <a href="/search/cs?searchtype=author&amp;query=Koripally%2C+N">Nandu Koripally</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Samuel Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Fleming%2C+G+C">Gabriella Coloyan Fleming</a>, <a href="/search/cs?searchtype=author&amp;query=Varkey%2C+P">Philip Varkey</a>, <a href="/search/cs?searchtype=author&amp;query=Belle%2C+J">Joseph Belle</a>, <a href="/search/cs?searchtype=author&amp;query=Mohan%2C+S">Sivasakthya Mohan</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+S+S">Sang Sub Han</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+D">Dong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonwoong Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Duan%2C+X">Xiangfeng Duan</a>, <a href="/search/cs?searchtype=author&amp;query=Incorvia%2C+J+A+C">Jean Anne C. Incorvia</a>, <a href="/search/cs?searchtype=author&amp;query=Akinwande%2C+D">Deji Akinwande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05449v1-abstract-short" style="display: inline;"> Wearable human activity sensors developed in the past decade show a distinct trend of becoming thinner and more imperceptible while retaining their electrical qualities, with graphene e-tattoos, as the ultimate example. A persistent challenge in modern wearables, however, is signal degradation due to the distance between the sensor&#39;s recording site and the signal transmission medium. To address th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05449v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05449v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05449v1-abstract-full" style="display: none;"> Wearable human activity sensors developed in the past decade show a distinct trend of becoming thinner and more imperceptible while retaining their electrical qualities, with graphene e-tattoos, as the ultimate example. A persistent challenge in modern wearables, however, is signal degradation due to the distance between the sensor&#39;s recording site and the signal transmission medium. To address this, we propose here to directly utilize human skin as a signal transmission medium as well as using low-cost gel electrodes for rapid probing of 2D transistor-based wearables. We demonstrate that the hypodermis layer of the skin can effectively serve as an electrolyte, enabling electrical potential application to semiconducting films made from graphene and other 2D materials placed on top of the skin. Graphene transistor tattoos, when biased through the body, exhibit high charge carrier mobility (up to 6500 2V-1s-1), with MoS2 and PtSe2 transistors showing mobilities up to 30 cm2V-1s-1 and 1 cm2V-1s-1, respectively. Finally, by introducing a layer of Nafion to the device structure, we observed neuromorphic functionality, transforming these e-tattoos into neuromorphic bioelectronic devices controlled through the skin itself. The neuromorphic bioelectronic tattoos have the potential for developing self-aware and stand-alone smart wearables, crucial for understanding and improving overall human performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05449v1-abstract-full').style.display = 'none'; document.getElementById('2410.05449v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03355">arXiv:2410.03355</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.03355">pdf</a>, <a href="https://arxiv.org/format/2410.03355">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LANTERN: Accelerating Visual Autoregressive Models with Relaxed Speculative Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jang%2C+D">Doohyuk Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Sihwan Park</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J+Y">June Yong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonsung Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Yun%2C+J">Jihun Yun</a>, <a href="/search/cs?searchtype=author&amp;query=Kundu%2C+S">Souvik Kundu</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sung-Yub Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+E">Eunho Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03355v1-abstract-short" style="display: inline;"> Auto-Regressive (AR) models have recently gained prominence in image generation, often matching or even surpassing the performance of diffusion models. However, one major limitation of AR models is their sequential nature, which processes tokens one at a time, slowing down generation compared to models like GANs or diffusion-based methods that operate more efficiently. While speculative decoding h&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03355v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03355v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03355v1-abstract-full" style="display: none;"> Auto-Regressive (AR) models have recently gained prominence in image generation, often matching or even surpassing the performance of diffusion models. However, one major limitation of AR models is their sequential nature, which processes tokens one at a time, slowing down generation compared to models like GANs or diffusion-based methods that operate more efficiently. While speculative decoding has proven effective for accelerating LLMs by generating multiple tokens in a single forward, its application in visual AR models remains largely unexplored. In this work, we identify a challenge in this setting, which we term \textit{token selection ambiguity}, wherein visual AR models frequently assign uniformly low probabilities to tokens, hampering the performance of speculative decoding. To overcome this challenge, we propose a relaxed acceptance condition referred to as LANTERN that leverages the interchangeability of tokens in latent space. This relaxation restores the effectiveness of speculative decoding in visual AR models by enabling more flexible use of candidate tokens that would otherwise be prematurely rejected. Furthermore, by incorporating a total variation distance bound, we ensure that these speed gains are achieved without significantly compromising image quality or semantic coherence. Experimental results demonstrate the efficacy of our method in providing a substantial speed-up over speculative decoding. In specific, compared to a na茂ve application of the state-of-the-art speculative decoding, LANTERN increases speed-ups by $\mathbf{1.75}\times$ and $\mathbf{1.76}\times$, as compared to greedy decoding and random sampling, respectively, when applied to LlamaGen, a contemporary visual AR model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03355v1-abstract-full').style.display = 'none'; document.getElementById('2410.03355v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07467">arXiv:2409.07467</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.07467">pdf</a>, <a href="https://arxiv.org/format/2409.07467">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Flexible Control in Symbolic Music Generation via Musical Metadata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Han%2C+S">Sangjun Han</a>, <a href="/search/cs?searchtype=author&amp;query=Ham%2C+J">Jiwon Ham</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+C">Chaeeun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Heejin Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Do%2C+S">Soojong Do</a>, <a href="/search/cs?searchtype=author&amp;query=Yi%2C+S">Sihyuk Yi</a>, <a href="/search/cs?searchtype=author&amp;query=Seo%2C+J">Jun Seo</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Seoyoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yountae Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+W">Woohyung Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07467v1-abstract-short" style="display: inline;"> In this work, we introduce the demonstration of symbolic music generation, focusing on providing short musical motifs that serve as the central theme of the narrative. For the generation, we adopt an autoregressive model which takes musical metadata as inputs and generates 4 bars of multitrack MIDI sequences. During training, we randomly drop tokens from the musical metadata to guarantee flexible&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07467v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07467v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07467v1-abstract-full" style="display: none;"> In this work, we introduce the demonstration of symbolic music generation, focusing on providing short musical motifs that serve as the central theme of the narrative. For the generation, we adopt an autoregressive model which takes musical metadata as inputs and generates 4 bars of multitrack MIDI sequences. During training, we randomly drop tokens from the musical metadata to guarantee flexible control. It provides users with the freedom to select input types while maintaining generative performance, enabling greater flexibility in music composition. We validate the effectiveness of the strategy through experiments in terms of model capacity, musical fidelity, diversity, and controllability. Additionally, we scale up the model and compare it with other music generation model through a subjective test. Our results indicate its superiority in both control and music quality. We provide a URL link https://www.youtube.com/watch?v=-0drPrFJdMQ to our demonstration video. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07467v1-abstract-full').style.display = 'none'; document.getElementById('2409.07467v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00323">arXiv:2409.00323</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.00323">pdf</a>, <a href="https://arxiv.org/format/2409.00323">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.13140/RG.2.2.25134.11847">10.13140/RG.2.2.25134.11847 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> From Prediction to Application: Language Model-based Code Knowledge Tracing with Domain Adaptive Pre-Training and Automatic Feedback System with Pedagogical Prompting for Comprehensive Programming Education </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+U">Unggi Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+J">Jiyeong Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonji Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+M">Minji Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Byun%2C+G">Gyuri Byun</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Yeonseo Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+D">Dohee Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sookbun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jaekwon Park</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+T">Taekyung Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+G">Gunho Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyeoncheol Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00323v1-abstract-short" style="display: inline;"> Knowledge Tracing (KT) is a critical component in online learning, but traditional approaches face limitations in interpretability and cross-domain adaptability. This paper introduces Language Model-based Code Knowledge Tracing (CodeLKT), an innovative application of Language model-based Knowledge Tracing (LKT) to programming education. CodeLKT leverages pre-trained language models to process lear&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00323v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00323v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00323v1-abstract-full" style="display: none;"> Knowledge Tracing (KT) is a critical component in online learning, but traditional approaches face limitations in interpretability and cross-domain adaptability. This paper introduces Language Model-based Code Knowledge Tracing (CodeLKT), an innovative application of Language model-based Knowledge Tracing (LKT) to programming education. CodeLKT leverages pre-trained language models to process learning data, demonstrating superior performance over existing KT and Code KT models. We explore Domain Adaptive Pre-Training (DAPT) and Task Adaptive Pre-Training (TAPT), showing enhanced performance in the coding domain and investigating cross-domain transfer between mathematics and coding. Additionally, we present an theoretically-informed integrated system combining CodeLKT with large language models to generate personalized, in-depth feedback to support students&#39; programming learning. This work advances the field of Code Knowledge Tracing by expanding the knowledge base with language model-based approach and offering practical implications for programming education through data-informed feedback. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00323v1-abstract-full').style.display = 'none'; document.getElementById('2409.00323v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03541">arXiv:2408.03541</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.03541">pdf</a>, <a href="https://arxiv.org/ps/2408.03541">ps</a>, <a href="https://arxiv.org/format/2408.03541">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EXAONE 3.0 7.8B Instruction Tuned Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Research%2C+L+A">LG AI Research</a>, <a href="/search/cs?searchtype=author&amp;query=%3A"> :</a>, <a href="/search/cs?searchtype=author&amp;query=An%2C+S">Soyoung An</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+K">Kyunghoon Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+E">Eunbi Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+S+J">Stanley Jungkyu Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yemuk Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+S">Seokhee Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+Y">Yeonjung Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+J">Junwon Hwang</a>, <a href="/search/cs?searchtype=author&amp;query=Jeon%2C+H">Hyojin Jeon</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+G+J">Gerrard Jeongwon Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+H">Hyunjik Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+J">Jiyeon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yountae Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+E">Euisoon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyosang Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Joonkee Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Seonghwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Soyeon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sunkyoung Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+Y">Yireun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+Y">Youchul Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+E+H">Edward Hwayoung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Haeju Lee</a> , et al. (14 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03541v3-abstract-short" style="display: inline;"> We introduce EXAONE 3.0 instruction-tuned language model, the first open model in the family of Large Language Models (LLMs) developed by LG AI Research. Among different model sizes, we publicly release the 7.8B instruction-tuned model to promote open research and innovations. Through extensive evaluations across a wide range of public and in-house benchmarks, EXAONE 3.0 demonstrates highly compet&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03541v3-abstract-full').style.display = 'inline'; document.getElementById('2408.03541v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03541v3-abstract-full" style="display: none;"> We introduce EXAONE 3.0 instruction-tuned language model, the first open model in the family of Large Language Models (LLMs) developed by LG AI Research. Among different model sizes, we publicly release the 7.8B instruction-tuned model to promote open research and innovations. Through extensive evaluations across a wide range of public and in-house benchmarks, EXAONE 3.0 demonstrates highly competitive real-world performance with instruction-following capability against other state-of-the-art open models of similar size. Our comparative analysis shows that EXAONE 3.0 excels particularly in Korean, while achieving compelling performance across general tasks and complex reasoning. With its strong real-world effectiveness and bilingual proficiency, we hope that EXAONE keeps contributing to advancements in Expert AI. Our EXAONE 3.0 instruction-tuned model is available at https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03541v3-abstract-full').style.display = 'none'; document.getElementById('2408.03541v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00853">arXiv:2408.00853</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.00853">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Real-time Dexterous Telemanipulation with an End-Effect-Oriented Learning-based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haoyang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+H">He Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaoli Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yunsik Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Bowman%2C+M">Michel Bowman</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+L">Lingfeng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00853v1-abstract-short" style="display: inline;"> Dexterous telemanipulation is crucial in advancing human-robot systems, especially in tasks requiring precise and safe manipulation. However, it faces significant challenges due to the physical differences between human and robotic hands, the dynamic interaction with objects, and the indirect control and perception of the remote environment. Current approaches predominantly focus on mapping the hu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00853v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00853v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00853v1-abstract-full" style="display: none;"> Dexterous telemanipulation is crucial in advancing human-robot systems, especially in tasks requiring precise and safe manipulation. However, it faces significant challenges due to the physical differences between human and robotic hands, the dynamic interaction with objects, and the indirect control and perception of the remote environment. Current approaches predominantly focus on mapping the human hand onto robotic counterparts to replicate motions, which exhibits a critical oversight: it often neglects the physical interaction with objects and relegates the interaction burden to the human to adapt and make laborious adjustments in response to the indirect and counter-intuitive observation of the remote environment. This work develops an End-Effects-Oriented Learning-based Dexterous Telemanipulation (EFOLD) framework to address telemanipulation tasks. EFOLD models telemanipulation as a Markov Game, introducing multiple end-effect features to interpret the human operator&#39;s commands during interaction with objects. These features are used by a Deep Reinforcement Learning policy to control the robot and reproduce such end effects. EFOLD was evaluated with real human subjects and two end-effect extraction methods for controlling a virtual Shadow Robot Hand in telemanipulation tasks. EFOLD achieved real-time control capability with low command following latency (delay&lt;0.11s) and highly accurate tracking (MSE&lt;0.084 rad). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00853v1-abstract-full').style.display = 'none'; document.getElementById('2408.00853v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IROS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02403">arXiv:2407.02403</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.02403">pdf</a>, <a href="https://arxiv.org/format/2407.02403">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Face Reconstruction Transfer Attack as Out-of-Distribution Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y+G">Yoon Gyo Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jaewoo Park</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+X">Xingbo Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+H">Hojin Park</a>, <a href="/search/cs?searchtype=author&amp;query=Teoh%2C+A+B+J">Andrew Beng Jin Teoh</a>, <a href="/search/cs?searchtype=author&amp;query=Camps%2C+O">Octavia Camps</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02403v2-abstract-short" style="display: inline;"> Understanding the vulnerability of face recognition systems to malicious attacks is of critical importance. Previous works have focused on reconstructing face images that can penetrate a targeted verification system. Even in the white-box scenario, however, naively reconstructed images misrepresent the identity information, hence the attacks are easily neutralized once the face system is updated o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02403v2-abstract-full').style.display = 'inline'; document.getElementById('2407.02403v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02403v2-abstract-full" style="display: none;"> Understanding the vulnerability of face recognition systems to malicious attacks is of critical importance. Previous works have focused on reconstructing face images that can penetrate a targeted verification system. Even in the white-box scenario, however, naively reconstructed images misrepresent the identity information, hence the attacks are easily neutralized once the face system is updated or changed. In this paper, we aim to reconstruct face images which are capable of transferring face attacks on unseen encoders. We term this problem as Face Reconstruction Transfer Attack (FRTA) and show that it can be formulated as an out-of-distribution (OOD) generalization problem. Inspired by its OOD nature, we propose to solve FRTA by Averaged Latent Search and Unsupervised Validation with pseudo target (ALSUV). To strengthen the reconstruction attack on OOD unseen encoders, ALSUV reconstructs the face by searching the latent of amortized generator StyleGAN2 through multiple latent optimization, latent optimization trajectory averaging, and unsupervised validation with a pseudo target. We demonstrate the efficacy and generalization of our method on widely used face datasets, accompanying it with extensive ablation studies and visually, qualitatively, and quantitatively analyses. The source code will be released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02403v2-abstract-full').style.display = 'none'; document.getElementById('2407.02403v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ECCV2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15664">arXiv:2406.15664</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.15664">pdf</a>, <a href="https://arxiv.org/format/2406.15664">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Flat Posterior Does Matter For Bayesian Model Averaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S">Sungjun Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Yeom%2C+J">Jeyoon Yeom</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sooyon Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Byun%2C+H">Hoyoon Byun</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jinho Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yohan Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+J">Jiyoung Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+K">Kyungwoo Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15664v3-abstract-short" style="display: inline;"> Bayesian neural network (BNN) approximates the posterior distribution of model parameters and utilizes the posterior for prediction via Bayesian Model Averaging (BMA). The quality of the posterior approximation is critical for achieving accurate and robust predictions. It is known that flatness in the loss landscape is strongly associated with generalization performance, and it necessitates consid&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15664v3-abstract-full').style.display = 'inline'; document.getElementById('2406.15664v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15664v3-abstract-full" style="display: none;"> Bayesian neural network (BNN) approximates the posterior distribution of model parameters and utilizes the posterior for prediction via Bayesian Model Averaging (BMA). The quality of the posterior approximation is critical for achieving accurate and robust predictions. It is known that flatness in the loss landscape is strongly associated with generalization performance, and it necessitates consideration to improve the quality of the posterior approximation. In this work, we empirically demonstrate that BNNs often struggle to capture the flatness. Moreover, we provide both experimental and theoretical evidence showing that BMA can be ineffective without ensuring flatness. To address this, we propose Sharpness-Aware Bayesian Model Averaging (SA-BMA), a novel optimizer that seeks flat posteriors by calculating divergence in the parameter space. SA-BMA aligns with the intrinsic nature of BNN and the generalized version of existing sharpness-aware optimizers for DNN. In addition, we suggest a Bayesian Transfer Learning scheme to efficiently leverage pre-trained DNN. We validate the efficacy of SA-BMA in enhancing generalization performance in few-shot classification and distribution shift by ensuring flat posterior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15664v3-abstract-full').style.display = 'none'; document.getElementById('2406.15664v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07923">arXiv:2406.07923</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.07923">pdf</a>, <a href="https://arxiv.org/format/2406.07923">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21437/Interspeech.2024">10.21437/Interspeech.2024 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CTC-aligned Audio-Text Embedding for Streaming Open-vocabulary Keyword Spotting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jin%2C+S">Sichen Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Youngmoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seungjin Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Roh%2C+J">Jaeyoung Roh</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C">Changwoo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+H">Hoonyoung Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07923v1-abstract-short" style="display: inline;"> This paper introduces a novel approach for streaming openvocabulary keyword spotting (KWS) with text-based keyword enrollment. For every input frame, the proposed method finds the optimal alignment ending at the frame using connectionist temporal classification (CTC) and aggregates the frame-level acoustic embedding (AE) to obtain higher-level (i.e., character, word, or phrase) AE that aligns with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07923v1-abstract-full').style.display = 'inline'; document.getElementById('2406.07923v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07923v1-abstract-full" style="display: none;"> This paper introduces a novel approach for streaming openvocabulary keyword spotting (KWS) with text-based keyword enrollment. For every input frame, the proposed method finds the optimal alignment ending at the frame using connectionist temporal classification (CTC) and aggregates the frame-level acoustic embedding (AE) to obtain higher-level (i.e., character, word, or phrase) AE that aligns with the text embedding (TE) of the target keyword text. After that, we calculate the similarity of the aggregated AE and the TE. To the best of our knowledge, this is the first attempt to dynamically align the audio and the keyword text on-the-fly to attain the joint audio-text embedding for KWS. Despite operating in a streaming fashion, our approach achieves competitive performance on the LibriPhrase dataset compared to the non-streaming methods with a mere 155K model parameters and a decoding algorithm with time complexity O(U), where U is the length of the target keyword at inference time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07923v1-abstract-full').style.display = 'none'; document.getElementById('2406.07923v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of Interspeech 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05314">arXiv:2406.05314</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.05314">pdf</a>, <a href="https://arxiv.org/format/2406.05314">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Relational Proxy Loss for Audio-Text based Keyword Spotting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Youngmoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seungjin Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Joon-Young Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Roh%2C+J">Jaeyoung Roh</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C+W">Chang Woo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+H">Hoon-Young Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05314v1-abstract-short" style="display: inline;"> In recent years, there has been an increasing focus on user convenience, leading to increased interest in text-based keyword enrollment systems for keyword spotting (KWS). Since the system utilizes text input during the enrollment phase and audio input during actual usage, we call this task audio-text based KWS. To enable this task, both acoustic and text encoders are typically trained using deep&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05314v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05314v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05314v1-abstract-full" style="display: none;"> In recent years, there has been an increasing focus on user convenience, leading to increased interest in text-based keyword enrollment systems for keyword spotting (KWS). Since the system utilizes text input during the enrollment phase and audio input during actual usage, we call this task audio-text based KWS. To enable this task, both acoustic and text encoders are typically trained using deep metric learning loss functions, such as triplet- and proxy-based losses. This study aims to improve existing methods by leveraging the structural relations within acoustic embeddings and within text embeddings. Unlike previous studies that only compare acoustic and text embeddings on a point-to-point basis, our approach focuses on the relational structures within the embedding space by introducing the concept of Relational Proxy Loss (RPL). By incorporating RPL, we demonstrated improved performance on the Wall Street Journal (WSJ) corpus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05314v1-abstract-full').style.display = 'none'; document.getElementById('2406.05314v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, Accepted by Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00798">arXiv:2406.00798</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00798">pdf</a>, <a href="https://arxiv.org/format/2406.00798">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PruNeRF: Segment-Centric Dataset Pruning via 3D Spatial Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonsung Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Yun%2C+H">Heecheol Yun</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Joonhyung Park</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jin-Hwa Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+E">Eunho Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00798v1-abstract-short" style="display: inline;"> Neural Radiance Fields (NeRF) have shown remarkable performance in learning 3D scenes. However, NeRF exhibits vulnerability when confronted with distractors in the training images -- unexpected objects are present only within specific views, such as moving entities like pedestrians or birds. Excluding distractors during dataset construction is a straightforward solution, but without prior knowledg&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00798v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00798v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00798v1-abstract-full" style="display: none;"> Neural Radiance Fields (NeRF) have shown remarkable performance in learning 3D scenes. However, NeRF exhibits vulnerability when confronted with distractors in the training images -- unexpected objects are present only within specific views, such as moving entities like pedestrians or birds. Excluding distractors during dataset construction is a straightforward solution, but without prior knowledge of their types and quantities, it becomes prohibitively expensive. In this paper, we propose PruNeRF, a segment-centric dataset pruning framework via 3D spatial consistency, that effectively identifies and prunes the distractors. We first examine existing metrics for measuring pixel-wise distraction and introduce Influence Functions for more accurate measurements. Then, we assess 3D spatial consistency using a depth-based reprojection technique to obtain 3D-aware distraction. Furthermore, we incorporate segmentation for pixel-to-segment refinement, enabling more precise identification. Our experiments on benchmark datasets demonstrate that PruNeRF consistently outperforms state-of-the-art methods in robustness against distractors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00798v1-abstract-full').style.display = 'none'; document.getElementById('2406.00798v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15092">arXiv:2405.15092</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.15092">pdf</a>, <a href="https://arxiv.org/format/2405.15092">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Dissociation of Faithful and Unfaithful Reasoning in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yee%2C+E">Evelyn Yee</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+A">Alice Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+C">Chenyu Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y+H">Yeon Ho Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Paturi%2C+R">Ramamohan Paturi</a>, <a href="/search/cs?searchtype=author&amp;query=Bergen%2C+L">Leon Bergen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15092v2-abstract-short" style="display: inline;"> Large language models (LLMs) often improve their performance in downstream tasks when they generate Chain of Thought reasoning text before producing an answer. We investigate how LLMs recover from errors in Chain of Thought. Through analysis of error recovery behaviors, we find evidence for unfaithfulness in Chain of Thought, which occurs when models arrive at the correct answer despite invalid re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15092v2-abstract-full').style.display = 'inline'; document.getElementById('2405.15092v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15092v2-abstract-full" style="display: none;"> Large language models (LLMs) often improve their performance in downstream tasks when they generate Chain of Thought reasoning text before producing an answer. We investigate how LLMs recover from errors in Chain of Thought. Through analysis of error recovery behaviors, we find evidence for unfaithfulness in Chain of Thought, which occurs when models arrive at the correct answer despite invalid reasoning text. We identify factors that shift LLM recovery behavior: LLMs recover more frequently from obvious errors and in contexts that provide more evidence for the correct answer. Critically, these factors have divergent effects on faithful and unfaithful recoveries. Our results indicate that there are distinct mechanisms driving faithful and unfaithful error recoveries. Selective targeting of these mechanisms may be able to drive down the rate of unfaithful reasoning and improve model interpretability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15092v2-abstract-full').style.display = 'none'; document.getElementById('2405.15092v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">code published at https://github.com/CoTErrorRecovery/CoTErrorRecovery</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06808">arXiv:2404.06808</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.06808">pdf</a>, <a href="https://arxiv.org/format/2404.06808">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Formation-Controlled Dimensionality Reduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jeong%2C+T">Taeuk Jeong</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y+M">Yoon Mo Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06808v1-abstract-short" style="display: inline;"> Dimensionality reduction represents the process of generating a low dimensional representation of high dimensional data. Motivated by the formation control of mobile agents, we propose a nonlinear dynamical system for dimensionality reduction. The system consists of two parts; the control of neighbor points, addressing local structures, and the control of remote points, accounting for global struc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06808v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06808v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06808v1-abstract-full" style="display: none;"> Dimensionality reduction represents the process of generating a low dimensional representation of high dimensional data. Motivated by the formation control of mobile agents, we propose a nonlinear dynamical system for dimensionality reduction. The system consists of two parts; the control of neighbor points, addressing local structures, and the control of remote points, accounting for global structures. We also include a brief mathematical observation of the model and its numerical procedure. Numerical experiments are performed on both synthetic and real datasets and comparisons with existing models demonstrate the soundness and effectiveness of the proposed model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06808v1-abstract-full').style.display = 'none'; document.getElementById('2404.06808v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.03138">arXiv:2404.03138</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.03138">pdf</a>, <a href="https://arxiv.org/format/2404.03138">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Discontinuity-preserving Normal Integration with Auxiliary Edges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyomin Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yucheol Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seungyong Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.03138v1-abstract-short" style="display: inline;"> Many surface reconstruction methods incorporate normal integration, which is a process to obtain a depth map from surface gradients. In this process, the input may represent a surface with discontinuities, e.g., due to self-occlusion. To reconstruct an accurate depth map from the input normal map, hidden surface gradients occurring from the jumps must be handled. To model these jumps correctly, we&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03138v1-abstract-full').style.display = 'inline'; document.getElementById('2404.03138v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.03138v1-abstract-full" style="display: none;"> Many surface reconstruction methods incorporate normal integration, which is a process to obtain a depth map from surface gradients. In this process, the input may represent a surface with discontinuities, e.g., due to self-occlusion. To reconstruct an accurate depth map from the input normal map, hidden surface gradients occurring from the jumps must be handled. To model these jumps correctly, we design a novel discretization scheme for the domain of normal integration. Our key idea is to introduce auxiliary edges, which bridge between piecewise-smooth patches in the domain so that the magnitude of hidden jumps can be explicitly expressed. Using the auxiliary edges, we design a novel algorithm to optimize the discontinuity and the depth map from the input normal map. Our method optimizes discontinuities by using a combination of iterative re-weighted least squares and iterative filtering of the jump magnitudes on auxiliary edges to provide strong sparsity regularization. Compared to previous discontinuity-preserving normal integration methods, which model the magnitudes of jumps only implicitly, our method reconstructs subtle discontinuities accurately thanks to our explicit representation of jumps allowing for strong sparsity regularization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03138v1-abstract-full').style.display = 'none'; document.getElementById('2404.03138v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at CVPR 2024. For supplementary video, see https://youtu.be/MTTcW5kAOFE</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.02949">arXiv:2404.02949</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.02949">pdf</a>, <a href="https://arxiv.org/format/2404.02949">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The SaTML &#39;24 CNN Interpretability Competition: New Innovations for Concept-Level Interpretability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Casper%2C+S">Stephen Casper</a>, <a href="/search/cs?searchtype=author&amp;query=Yun%2C+J">Jieun Yun</a>, <a href="/search/cs?searchtype=author&amp;query=Baek%2C+J">Joonhyuk Baek</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeseong Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+M">Minhwan Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kwon%2C+K">Kiwan Kwon</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Saerom Park</a>, <a href="/search/cs?searchtype=author&amp;query=Moore%2C+H">Hayden Moore</a>, <a href="/search/cs?searchtype=author&amp;query=Shriver%2C+D">David Shriver</a>, <a href="/search/cs?searchtype=author&amp;query=Connor%2C+M">Marissa Connor</a>, <a href="/search/cs?searchtype=author&amp;query=Grimes%2C+K">Keltin Grimes</a>, <a href="/search/cs?searchtype=author&amp;query=Nicolson%2C+A">Angus Nicolson</a>, <a href="/search/cs?searchtype=author&amp;query=Tagade%2C+A">Arush Tagade</a>, <a href="/search/cs?searchtype=author&amp;query=Rumbelow%2C+J">Jessica Rumbelow</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H+M">Hieu Minh Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Hadfield-Menell%2C+D">Dylan Hadfield-Menell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.02949v1-abstract-short" style="display: inline;"> Interpretability techniques are valuable for helping humans understand and oversee AI systems. The SaTML 2024 CNN Interpretability Competition solicited novel methods for studying convolutional neural networks (CNNs) at the ImageNet scale. The objective of the competition was to help human crowd-workers identify trojans in CNNs. This report showcases the methods and results of four featured compet&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02949v1-abstract-full').style.display = 'inline'; document.getElementById('2404.02949v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.02949v1-abstract-full" style="display: none;"> Interpretability techniques are valuable for helping humans understand and oversee AI systems. The SaTML 2024 CNN Interpretability Competition solicited novel methods for studying convolutional neural networks (CNNs) at the ImageNet scale. The objective of the competition was to help human crowd-workers identify trojans in CNNs. This report showcases the methods and results of four featured competition entries. It remains challenging to help humans reliably diagnose trojans via interpretability tools. However, the competition&#39;s entries have contributed new techniques and set a new record on the benchmark from Casper et al., 2023. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02949v1-abstract-full').style.display = 'none'; document.getElementById('2404.02949v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Competition for SaTML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03960">arXiv:2403.03960</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.03960">pdf</a>, <a href="https://arxiv.org/format/2403.03960">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Assessing the Extrapolation Capability of Template-Free Retrosynthesis Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Shuan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yousung Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03960v1-abstract-short" style="display: inline;"> Despite the acknowledged capability of template-free models in exploring unseen reaction spaces compared to template-based models for retrosynthesis prediction, their ability to venture beyond established boundaries remains relatively uncharted. In this study, we empirically assess the extrapolation capability of state-of-the-art template-free models by meticulously assembling an extensive set of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03960v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03960v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03960v1-abstract-full" style="display: none;"> Despite the acknowledged capability of template-free models in exploring unseen reaction spaces compared to template-based models for retrosynthesis prediction, their ability to venture beyond established boundaries remains relatively uncharted. In this study, we empirically assess the extrapolation capability of state-of-the-art template-free models by meticulously assembling an extensive set of out-of-distribution (OOD) reactions. Our findings demonstrate that while template-free models exhibit potential in predicting precursors with novel synthesis rules, their top-10 exact-match accuracy in OOD reactions is strikingly modest (&lt; 1%). Furthermore, despite the capability of generating novel reactions, our investigation highlights a recurring issue where more than half of the novel reactions predicted by template-free models are chemically implausible. Consequently, we advocate for the future development of template-free models that integrate considerations of chemical feasibility when navigating unexplored regions of reaction space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03960v1-abstract-full').style.display = 'none'; document.getElementById('2403.03960v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08601">arXiv:2402.08601</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.08601">pdf</a>, <a href="https://arxiv.org/format/2402.08601">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Latent Inversion with Timestep-aware Sampling for Training-free Non-rigid Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yunji Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seokju Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Djanibekov%2C+T">Tair Djanibekov</a>, <a href="/search/cs?searchtype=author&amp;query=Shim%2C+H">Hyunjung Shim</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+J+C">Jong Chul Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08601v3-abstract-short" style="display: inline;"> Text-guided non-rigid editing involves complex edits for input images, such as changing motion or compositions within their surroundings. Since it requires manipulating the input structure, existing methods often struggle with preserving object identity and background, particularly when combined with Stable Diffusion. In this work, we propose a training-free approach for non-rigid editing with Sta&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08601v3-abstract-full').style.display = 'inline'; document.getElementById('2402.08601v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08601v3-abstract-full" style="display: none;"> Text-guided non-rigid editing involves complex edits for input images, such as changing motion or compositions within their surroundings. Since it requires manipulating the input structure, existing methods often struggle with preserving object identity and background, particularly when combined with Stable Diffusion. In this work, we propose a training-free approach for non-rigid editing with Stable Diffusion, aimed at improving the identity preservation quality without compromising editability. Our approach comprises three stages: text optimization, latent inversion, and timestep-aware text injection sampling. Inspired by the success of Imagic, we employ their text optimization for smooth editing. Then, we introduce latent inversion to preserve the input image&#39;s identity without additional model fine-tuning. To fully utilize the input reconstruction ability of latent inversion, we suggest timestep-aware text injection sampling. This effectively retains the structure of the input image by injecting the source text prompt in early sampling steps and then transitioning to the target prompt in subsequent sampling steps. This strategic approach seamlessly harmonizes with text optimization, facilitating complex non-rigid edits to the input without losing the original identity. We demonstrate the effectiveness of our method in terms of identity preservation, editability, and aesthetic quality through extensive experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08601v3-abstract-full').style.display = 'none'; document.getElementById('2402.08601v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This manuscript has been submitted to Pattern Recognition Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05448">arXiv:2402.05448</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.05448">pdf</a>, <a href="https://arxiv.org/format/2402.05448">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Minecraft-ify: Minecraft Style Image Generation with Text-guided Image Editing for In-Game Application </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+B">Bumsoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Byun%2C+S">Sanghyun Byun</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yonghoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+W">Wonseop Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Amin%2C+S+U">Sareer UI Amin</a>, <a href="/search/cs?searchtype=author&amp;query=Seo%2C+S">Sanghyun Seo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05448v2-abstract-short" style="display: inline;"> In this paper, we first present the character texture generation system \textit{Minecraft-ify}, specified to Minecraft video game toward in-game application. Ours can generate face-focused image for texture mapping tailored to 3D virtual character having cube manifold. While existing projects or works only generate texture, proposed system can inverse the user-provided real image, or generate aver&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05448v2-abstract-full').style.display = 'inline'; document.getElementById('2402.05448v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05448v2-abstract-full" style="display: none;"> In this paper, we first present the character texture generation system \textit{Minecraft-ify}, specified to Minecraft video game toward in-game application. Ours can generate face-focused image for texture mapping tailored to 3D virtual character having cube manifold. While existing projects or works only generate texture, proposed system can inverse the user-provided real image, or generate average/random appearance from learned distribution. Moreover, it can be manipulated with text-guidance using StyleGAN and StyleCLIP. These features provide a more extended user experience with enlarged freedom as a user-friendly AI-tool. Project page can be found at https://gh-bumsookim.github.io/Minecraft-ify/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05448v2-abstract-full').style.display = 'none'; document.getElementById('2402.05448v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2 pages, 2 figures. Accepted as Spotlight to NeurIPS 2023 Workshop on Machine Learning for Creativity and Design</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08998">arXiv:2401.08998</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.08998">pdf</a>, <a href="https://arxiv.org/format/2401.08998">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Attack and Reset for Unlearning: Exploiting Adversarial Noise toward Machine Unlearning through Parameter Re-initialization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yoonhwa Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+I">Ikhyun Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Hsu%2C+S">Shun-Hsiang Hsu</a>, <a href="/search/cs?searchtype=author&amp;query=Hockenmaier%2C+J">Julia Hockenmaier</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08998v1-abstract-short" style="display: inline;"> With growing concerns surrounding privacy and regulatory compliance, the concept of machine unlearning has gained prominence, aiming to selectively forget or erase specific learned information from a trained model. In response to this critical need, we introduce a novel approach called Attack-and-Reset for Unlearning (ARU). This algorithm leverages meticulously crafted adversarial noise to generat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08998v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08998v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08998v1-abstract-full" style="display: none;"> With growing concerns surrounding privacy and regulatory compliance, the concept of machine unlearning has gained prominence, aiming to selectively forget or erase specific learned information from a trained model. In response to this critical need, we introduce a novel approach called Attack-and-Reset for Unlearning (ARU). This algorithm leverages meticulously crafted adversarial noise to generate a parameter mask, effectively resetting certain parameters and rendering them unlearnable. ARU outperforms current state-of-the-art results on two facial machine-unlearning benchmark datasets, MUFAC and MUCAC. In particular, we present the steps involved in attacking and masking that strategically filter and re-initialize network parameters biased towards the forget set. Our work represents a significant advancement in rendering data unexploitable to deep learning models through parameter re-initialization, achieved by harnessing adversarial noise to craft a mask. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08998v1-abstract-full').style.display = 'none'; document.getElementById('2401.08998v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11890">arXiv:2312.11890</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.11890">pdf</a>, <a href="https://arxiv.org/format/2312.11890">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Difficulty-Focused Contrastive Learning for Knowledge Tracing with a Large Language Model-Based Difficulty Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+U">Unggi Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+S">Sungjun Yoon</a>, <a href="/search/cs?searchtype=author&amp;query=Yun%2C+J+S">Joon Seo Yun</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+K">Kyoungsoo Park</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">YoungHoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Stratton%2C+D">Damji Stratton</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyeoncheol Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11890v1-abstract-short" style="display: inline;"> This paper presents novel techniques for enhancing the performance of knowledge tracing (KT) models by focusing on the crucial factor of question and concept difficulty level. Despite the acknowledged significance of difficulty, previous KT research has yet to exploit its potential for model optimization and has struggled to predict difficulty from unseen data. To address these problems, we propos&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11890v1-abstract-full').style.display = 'inline'; document.getElementById('2312.11890v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11890v1-abstract-full" style="display: none;"> This paper presents novel techniques for enhancing the performance of knowledge tracing (KT) models by focusing on the crucial factor of question and concept difficulty level. Despite the acknowledged significance of difficulty, previous KT research has yet to exploit its potential for model optimization and has struggled to predict difficulty from unseen data. To address these problems, we propose a difficulty-centered contrastive learning method for KT models and a Large Language Model (LLM)-based framework for difficulty prediction. These innovative methods seek to improve the performance of KT models and provide accurate difficulty estimates for unseen data. Our ablation study demonstrates the efficacy of these techniques by demonstrating enhanced KT model performance. Nonetheless, the complex relationship between language and difficulty merits further investigation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11890v1-abstract-full').style.display = 'none'; document.getElementById('2312.11890v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05611">arXiv:2312.05611</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.05611">pdf</a>, <a href="https://arxiv.org/format/2312.05611">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Triplet Edge Attention for Algorithmic Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeonjoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+S">Sungsoo Ahn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05611v1-abstract-short" style="display: inline;"> This work investigates neural algorithmic reasoning to develop neural networks capable of learning from classical algorithms. The main challenge is to develop graph neural networks that are expressive enough to predict the given algorithm outputs while generalizing well to out-of-distribution data. In this work, we introduce a new graph neural network layer called Triplet Edge Attention (TEA), an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05611v1-abstract-full').style.display = 'inline'; document.getElementById('2312.05611v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05611v1-abstract-full" style="display: none;"> This work investigates neural algorithmic reasoning to develop neural networks capable of learning from classical algorithms. The main challenge is to develop graph neural networks that are expressive enough to predict the given algorithm outputs while generalizing well to out-of-distribution data. In this work, we introduce a new graph neural network layer called Triplet Edge Attention (TEA), an edge-aware graph attention layer. Our algorithm works by precisely computing edge latent, aggregating multiple triplet messages using edge-based attention. We empirically validate our TEA layer in the CLRS benchmark and demonstrate a $5%$ improvement on average. In particular, we achieve a $30%$ improvement for the string algorithms compared to the state-of-the-art model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05611v1-abstract-full').style.display = 'none'; document.getElementById('2312.05611v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10309">arXiv:2311.10309</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.10309">pdf</a>, <a href="https://arxiv.org/format/2311.10309">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Imagination-Augmented Hierarchical Reinforcement Learning for Safe and Interactive Autonomous Driving in Urban Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sang-Hyun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yoonjae Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Seo%2C+S">Seung-Woo Seo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10309v2-abstract-short" style="display: inline;"> Hierarchical reinforcement learning (HRL) incorporates temporal abstraction into reinforcement learning (RL) by explicitly taking advantage of hierarchical structure. Modern HRL typically designs a hierarchical agent composed of a high-level policy and low-level policies. The high-level policy selects which low-level policy to activate at a lower frequency and the activated low-level policy select&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10309v2-abstract-full').style.display = 'inline'; document.getElementById('2311.10309v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10309v2-abstract-full" style="display: none;"> Hierarchical reinforcement learning (HRL) incorporates temporal abstraction into reinforcement learning (RL) by explicitly taking advantage of hierarchical structure. Modern HRL typically designs a hierarchical agent composed of a high-level policy and low-level policies. The high-level policy selects which low-level policy to activate at a lower frequency and the activated low-level policy selects an action at each time step. Recent HRL algorithms have achieved performance gains over standard RL algorithms in synthetic navigation tasks. However, we cannot apply these HRL algorithms to real-world navigation tasks. One of the main challenges is that real-world navigation tasks require an agent to perform safe and interactive behaviors in dynamic environments. In this paper, we propose imagination-augmented HRL (IAHRL) that efficiently integrates imagination into HRL to enable an agent to learn safe and interactive behaviors in real-world navigation tasks. Imagination is to predict the consequences of actions without interactions with actual environments. The key idea behind IAHRL is that the low-level policies imagine safe and structured behaviors, and then the high-level policy infers interactions with surrounding objects by interpreting the imagined behaviors. We also introduce a new attention mechanism that allows our high-level policy to be permutation-invariant to the order of surrounding objects and to prioritize our agent over them. To evaluate IAHRL, we introduce five complex urban driving tasks, which are among the most challenging real-world navigation tasks. The experimental results indicate that IAHRL enables an agent to perform safe and interactive behaviors, achieving higher success rates and lower average episode steps than baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10309v2-abstract-full').style.display = 'none'; document.getElementById('2311.10309v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 9 figures; corrected typos, added references, revised experiments (results unchanged)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18119">arXiv:2310.18119</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.18119">pdf</a>, <a href="https://arxiv.org/format/2310.18119">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards a Unified Conversational Recommendation System: Multi-task Learning via Contextualized Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeongseo Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+E">Eunseo Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18119v1-abstract-short" style="display: inline;"> In Conversational Recommendation System (CRS), an agent is asked to recommend a set of items to users within natural language conversations. To address the need for both conversational capability and personalized recommendations, prior works have utilized separate recommendation and dialogue modules. However, such approach inevitably results in a discrepancy between recommendation results and gene&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18119v1-abstract-full').style.display = 'inline'; document.getElementById('2310.18119v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18119v1-abstract-full" style="display: none;"> In Conversational Recommendation System (CRS), an agent is asked to recommend a set of items to users within natural language conversations. To address the need for both conversational capability and personalized recommendations, prior works have utilized separate recommendation and dialogue modules. However, such approach inevitably results in a discrepancy between recommendation results and generated responses. To bridge the gap, we propose a multi-task learning for a unified CRS, where a single model jointly learns both tasks via Contextualized Knowledge Distillation (ConKD). We introduce two versions of ConKD: hard gate and soft gate. The former selectively gates between two task-specific teachers, while the latter integrates knowledge from both teachers. Our gates are computed on-the-fly in a context-specific manner, facilitating flexible integration of relevant knowledge. Extensive experiments demonstrate that our single model significantly improves recommendation performance while enhancing fluency, and achieves comparable results in terms of diversity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18119v1-abstract-full').style.display = 'none'; document.getElementById('2310.18119v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2023 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05538">arXiv:2310.05538</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.05538">pdf</a>, <a href="https://arxiv.org/format/2310.05538">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> M3FPolypSegNet: Segmentation Network with Multi-frequency Feature Fusion for Polyp Localization in Colonoscopy Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nam%2C+J">Ju-Hyeon Nam</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Seo-Hyeong Park</a>, <a href="/search/cs?searchtype=author&amp;query=Syazwany%2C+N+S">Nur Suriza Syazwany</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yerim Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Im%2C+Y">Yu-Han Im</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sang-Chul Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05538v2-abstract-short" style="display: inline;"> Polyp segmentation is crucial for preventing colorectal cancer a common type of cancer. Deep learning has been used to segment polyps automatically, which reduces the risk of misdiagnosis. Localizing small polyps in colonoscopy images is challenging because of its complex characteristics, such as color, occlusion, and various shapes of polyps. To address this challenge, a novel frequency-based ful&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05538v2-abstract-full').style.display = 'inline'; document.getElementById('2310.05538v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05538v2-abstract-full" style="display: none;"> Polyp segmentation is crucial for preventing colorectal cancer a common type of cancer. Deep learning has been used to segment polyps automatically, which reduces the risk of misdiagnosis. Localizing small polyps in colonoscopy images is challenging because of its complex characteristics, such as color, occlusion, and various shapes of polyps. To address this challenge, a novel frequency-based fully convolutional neural network, Multi-Frequency Feature Fusion Polyp Segmentation Network (M3FPolypSegNet) was proposed to decompose the input image into low/high/full-frequency components to use the characteristics of each component. We used three independent multi-frequency encoders to map multiple input images into a high-dimensional feature space. In the Frequency-ASPP Scalable Attention Module (F-ASPP SAM), ASPP was applied between each frequency component to preserve scale information. Subsequently, scalable attention was applied to emphasize polyp regions in a high-dimensional feature space. Finally, we designed three multi-task learning (i.e., region, edge, and distance) in four decoder blocks to learn the structural characteristics of the region. The proposed model outperformed various segmentation models with performance gains of 6.92% and 7.52% on average for all metrics on CVC-ClinicDB and BKAI-IGH-NeoPolyp, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05538v2-abstract-full').style.display = 'none'; document.getElementById('2310.05538v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5pages. 2023 IEEE International Conference on Image Processing (ICIP). IEEE, 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92C55 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.14888">arXiv:2309.14888</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.14888">pdf</a>, <a href="https://arxiv.org/format/2309.14888">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Nearest Neighbor Guidance for Out-of-Distribution Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jaewoo Park</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y+G">Yoon Gyo Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Teoh%2C+A+B+J">Andrew Beng Jin Teoh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.14888v1-abstract-short" style="display: inline;"> Detecting out-of-distribution (OOD) samples are crucial for machine learning models deployed in open-world environments. Classifier-based scores are a standard approach for OOD detection due to their fine-grained detection capability. However, these scores often suffer from overconfidence issues, misclassifying OOD samples distant from the in-distribution region. To address this challenge, we prop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14888v1-abstract-full').style.display = 'inline'; document.getElementById('2309.14888v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.14888v1-abstract-full" style="display: none;"> Detecting out-of-distribution (OOD) samples are crucial for machine learning models deployed in open-world environments. Classifier-based scores are a standard approach for OOD detection due to their fine-grained detection capability. However, these scores often suffer from overconfidence issues, misclassifying OOD samples distant from the in-distribution region. To address this challenge, we propose a method called Nearest Neighbor Guidance (NNGuide) that guides the classifier-based score to respect the boundary geometry of the data manifold. NNGuide reduces the overconfidence of OOD samples while preserving the fine-grained capability of the classifier-based score. We conduct extensive experiments on ImageNet OOD detection benchmarks under diverse settings, including a scenario where the ID data undergoes natural distribution shift. Our results demonstrate that NNGuide provides a significant performance improvement on the base detection scores, achieving state-of-the-art results on both AUROC, FPR95, and AUPR metrics. The code is given at \url{https://github.com/roomo7time/nnguide}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.14888v1-abstract-full').style.display = 'none'; document.getElementById('2309.14888v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICCV2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00237">arXiv:2309.00237</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.00237">pdf</a>, <a href="https://arxiv.org/format/2309.00237">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Publicly Shareable Clinical Large Language Model Built on Synthetic Clinical Notes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kweon%2C+S">Sunjun Kweon</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Junu Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jiyoun Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Im%2C+S">Sujeong Im</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+E">Eunbyeol Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+S">Seongsu Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Oh%2C+J">Jungwoo Oh</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+G">Gyubok Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Moon%2C+J+H">Jong Hak Moon</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+S+C">Seng Chan You</a>, <a href="/search/cs?searchtype=author&amp;query=Baek%2C+S">Seungjin Baek</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+C+H">Chang Hoon Han</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y+B">Yoon Bin Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Jo%2C+Y">Yohan Jo</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+E">Edward Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00237v4-abstract-short" style="display: inline;"> The development of large language models tailored for handling patients&#39; clinical notes is often hindered by the limited accessibility and usability of these notes due to strict privacy regulations. To address these challenges, we first create synthetic large-scale clinical notes using publicly available case reports extracted from biomedical literature. We then use these synthetic notes to train&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00237v4-abstract-full').style.display = 'inline'; document.getElementById('2309.00237v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00237v4-abstract-full" style="display: none;"> The development of large language models tailored for handling patients&#39; clinical notes is often hindered by the limited accessibility and usability of these notes due to strict privacy regulations. To address these challenges, we first create synthetic large-scale clinical notes using publicly available case reports extracted from biomedical literature. We then use these synthetic notes to train our specialized clinical large language model, Asclepius. While Asclepius is trained on synthetic data, we assess its potential performance in real-world applications by evaluating it using real clinical notes. We benchmark Asclepius against several other large language models, including GPT-3.5-turbo and other open-source alternatives. To further validate our approach using synthetic notes, we also compare Asclepius with its variants trained on real clinical notes. Our findings convincingly demonstrate that synthetic clinical notes can serve as viable substitutes for real ones when constructing high-performing clinical language models. This conclusion is supported by detailed evaluations conducted by both GPT-4 and medical professionals. All resources including weights, codes, and data used in the development of Asclepius are made publicly accessible for future research. (https://github.com/starmpcc/Asclepius) <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00237v4-abstract-full').style.display = 'none'; document.getElementById('2309.00237v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024 (Findings)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.16529">arXiv:2308.16529</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.16529">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Developing Social Robots with Empathetic Non-Verbal Cues Using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y+K">Yoon Kyung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yoonwon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+G">Gyuyi Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Hahn%2C+S">Sowon Hahn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.16529v1-abstract-short" style="display: inline;"> We propose augmenting the empathetic capacities of social robots by integrating non-verbal cues. Our primary contribution is the design and labeling of four types of empathetic non-verbal cues, abbreviated as SAFE: Speech, Action (gesture), Facial expression, and Emotion, in a social robot. These cues are generated using a Large Language Model (LLM). We developed an LLM-based conversational system&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.16529v1-abstract-full').style.display = 'inline'; document.getElementById('2308.16529v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.16529v1-abstract-full" style="display: none;"> We propose augmenting the empathetic capacities of social robots by integrating non-verbal cues. Our primary contribution is the design and labeling of four types of empathetic non-verbal cues, abbreviated as SAFE: Speech, Action (gesture), Facial expression, and Emotion, in a social robot. These cues are generated using a Large Language Model (LLM). We developed an LLM-based conversational system for the robot and assessed its alignment with social cues as defined by human counselors. Preliminary results show distinct patterns in the robot&#39;s responses, such as a preference for calm and positive social emotions like &#39;joy&#39; and &#39;lively&#39;, and frequent nodding gestures. Despite these tendencies, our approach has led to the development of a social robot capable of context-aware and more authentic interactions. Our work lays the groundwork for future studies on human-robot interactions, emphasizing the essential role of both verbal and non-verbal cues in creating social and empathetic robots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.16529v1-abstract-full').style.display = 'none'; document.getElementById('2308.16529v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In Proceedings of 2023 IEEE International Conference on Robot &amp; Human Interactive Communication (RO-MAN) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.16205">arXiv:2307.16205</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.16205">pdf</a>, <a href="https://arxiv.org/format/2307.16205">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3588432.3591498">10.1145/3588432.3591498 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Mesh Density Adaptation for Template-based Shape Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yucheol Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyomin Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Hwang%2C+G">Gyeongha Hwang</a>, <a href="/search/cs?searchtype=author&amp;query=Baek%2C+S">Seung-Hwan Baek</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seungyong Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.16205v1-abstract-short" style="display: inline;"> In 3D shape reconstruction based on template mesh deformation, a regularization, such as smoothness energy, is employed to guide the reconstruction into a desirable direction. In this paper, we highlight an often overlooked property in the regularization: the vertex density in the mesh. Without careful control on the density, the reconstruction may suffer from under-sampling of vertices near shape&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16205v1-abstract-full').style.display = 'inline'; document.getElementById('2307.16205v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.16205v1-abstract-full" style="display: none;"> In 3D shape reconstruction based on template mesh deformation, a regularization, such as smoothness energy, is employed to guide the reconstruction into a desirable direction. In this paper, we highlight an often overlooked property in the regularization: the vertex density in the mesh. Without careful control on the density, the reconstruction may suffer from under-sampling of vertices near shape details. We propose a novel mesh density adaptation method to resolve the under-sampling problem. Our mesh density adaptation energy increases the density of vertices near complex structures via deformation to help reconstruction of shape details. We demonstrate the usability and performance of mesh density adaptation with two tasks, inverse rendering and non-rigid surface registration. Our method produces more accurate reconstruction results compared to the cases without mesh density adaptation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16205v1-abstract-full').style.display = 'none'; document.getElementById('2307.16205v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at SIGGRAPH 2023. Jung and Kim shares equal contribution. For codes, see https://github.com/ycjungSubhuman/density-adaptation/</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.5; I.3.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05916">arXiv:2307.05916</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.05916">pdf</a>, <a href="https://arxiv.org/format/2307.05916">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SwiFT: Swin 4D fMRI Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+P+Y">Peter Yongho Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Kwon%2C+J">Junbeom Kwon</a>, <a href="/search/cs?searchtype=author&amp;query=Joo%2C+S">Sunghwan Joo</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+S">Sangyoon Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Donggyu Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yoonho Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Yoo%2C+S">Shinjae Yoo</a>, <a href="/search/cs?searchtype=author&amp;query=Cha%2C+J">Jiook Cha</a>, <a href="/search/cs?searchtype=author&amp;query=Moon%2C+T">Taesup Moon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05916v2-abstract-short" style="display: inline;"> Modeling spatiotemporal brain dynamics from high-dimensional data, such as functional Magnetic Resonance Imaging (fMRI), is a formidable task in neuroscience. Existing approaches for fMRI analysis utilize hand-crafted features, but the process of feature extraction risks losing essential information in fMRI scans. To address this challenge, we present SwiFT (Swin 4D fMRI Transformer), a Swin Trans&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05916v2-abstract-full').style.display = 'inline'; document.getElementById('2307.05916v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05916v2-abstract-full" style="display: none;"> Modeling spatiotemporal brain dynamics from high-dimensional data, such as functional Magnetic Resonance Imaging (fMRI), is a formidable task in neuroscience. Existing approaches for fMRI analysis utilize hand-crafted features, but the process of feature extraction risks losing essential information in fMRI scans. To address this challenge, we present SwiFT (Swin 4D fMRI Transformer), a Swin Transformer architecture that can learn brain dynamics directly from fMRI volumes in a memory and computation-efficient manner. SwiFT achieves this by implementing a 4D window multi-head self-attention mechanism and absolute positional embeddings. We evaluate SwiFT using multiple large-scale resting-state fMRI datasets, including the Human Connectome Project (HCP), Adolescent Brain Cognitive Development (ABCD), and UK Biobank (UKB) datasets, to predict sex, age, and cognitive intelligence. Our experimental outcomes reveal that SwiFT consistently outperforms recent state-of-the-art models. Furthermore, by leveraging its end-to-end learning capability, we show that contrastive loss-based self-supervised pre-training of SwiFT can enhance performance on downstream tasks. Additionally, we employ an explainable AI method to identify the brain regions associated with sex classification. To our knowledge, SwiFT is the first Swin Transformer architecture to process dimensional spatiotemporal brain functional data in an end-to-end fashion. Our work holds substantial potential in facilitating scalable learning of functional brain imaging in neuroscience research by reducing the hurdles associated with applying Transformer models to high-dimensional fMRI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05916v2-abstract-full').style.display = 'none'; document.getElementById('2307.05916v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01350">arXiv:2307.01350</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.01350">pdf</a>, <a href="https://arxiv.org/format/2307.01350">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Mobile Manipulation via Whole-Body Bilateral Teleoperation of a Wheeled Humanoid </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Purushottam%2C+A">Amartya Purushottam</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yeongtae Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+C">Christopher Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Ramos%2C+J">Joao Ramos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01350v1-abstract-short" style="display: inline;"> Humanoid robots have the potential to help human workers by realizing physically demanding manipulation tasks such as moving large boxes within warehouses. We define such tasks as Dynamic Mobile Manipulation (DMM). This paper presents a framework for DMM via whole-body teleoperation, built upon three key contributions: Firstly, a teleoperation framework employing a Human Machine Interface (HMI) an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01350v1-abstract-full').style.display = 'inline'; document.getElementById('2307.01350v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01350v1-abstract-full" style="display: none;"> Humanoid robots have the potential to help human workers by realizing physically demanding manipulation tasks such as moving large boxes within warehouses. We define such tasks as Dynamic Mobile Manipulation (DMM). This paper presents a framework for DMM via whole-body teleoperation, built upon three key contributions: Firstly, a teleoperation framework employing a Human Machine Interface (HMI) and a bi-wheeled humanoid, SATYRR, is proposed. Secondly, the study introduces a dynamic locomotion mapping, utilizing human-robot reduced order models, and a kinematic retargeting strategy for manipulation tasks. Additionally, the paper discusses the role of whole-body haptic feedback for wheeled humanoid control. Finally, the system&#39;s effectiveness and mappings for DMM are validated through locomanipulation experiments and heavy box pushing tasks. Here we show two forms of DMM: grasping a target moving at an average speed of 0.4 m/s, and pushing boxes weighing up to 105\% of the robot&#39;s weight. By simultaneously adjusting their pitch and using their arms, the pilot adjusts the robot pose to apply larger contact forces and move a heavy box at a constant velocity of 0.2 m/s. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01350v1-abstract-full').style.display = 'none'; document.getElementById('2307.01350v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.08126">arXiv:2306.08126</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.08126">pdf</a>, <a href="https://arxiv.org/format/2306.08126">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PersonaPKT: Building Personalized Dialogue Agents via Parameter-efficient Knowledge Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+B">Bin Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yoon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+B">Benjamin Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiaohu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+C">Chenlei Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.08126v1-abstract-short" style="display: inline;"> Personalized dialogue agents (DAs) powered by large pre-trained language models (PLMs) often rely on explicit persona descriptions to maintain personality consistency. However, such descriptions may not always be available or may pose privacy concerns. To tackle this bottleneck, we introduce PersonaPKT, a lightweight transfer learning approach that can build persona-consistent dialogue models with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08126v1-abstract-full').style.display = 'inline'; document.getElementById('2306.08126v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.08126v1-abstract-full" style="display: none;"> Personalized dialogue agents (DAs) powered by large pre-trained language models (PLMs) often rely on explicit persona descriptions to maintain personality consistency. However, such descriptions may not always be available or may pose privacy concerns. To tackle this bottleneck, we introduce PersonaPKT, a lightweight transfer learning approach that can build persona-consistent dialogue models without explicit persona descriptions. By representing each persona as a continuous vector, PersonaPKT learns implicit persona-specific features directly from a small number of dialogue samples produced by the same persona, adding less than 0.1% trainable parameters for each persona on top of the PLM backbone. Empirical results demonstrate that PersonaPKT effectively builds personalized DAs with high storage efficiency, outperforming various baselines in terms of persona consistency while maintaining good response generation quality. In addition, it enhances privacy protection by avoiding explicit persona descriptions. Overall, PersonaPKT is an effective solution for creating personalized DAs that respect user privacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08126v1-abstract-full').style.display = 'none'; document.getElementById('2306.08126v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures, accepted to SustaiNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.02694">arXiv:2306.02694</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.02694">pdf</a>, <a href="https://arxiv.org/format/2306.02694">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/RO-MAN57019.2023.10309617">10.1109/RO-MAN57019.2023.10309617 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Social Robots As Companions for Lonely Hearts: The Role of Anthropomorphism and Robot Appearance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yoonwon Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Hahn%2C+S">Sowon Hahn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.02694v2-abstract-short" style="display: inline;"> Loneliness is a distressing personal experience and a growing social issue. Social robots could alleviate the pain of loneliness, particularly for those who lack in-person interaction. This paper investigated how the effect of loneliness on the anthropomorphism of social robots differs by robot appearance, and how it influences purchase intention. Participants viewed a video of one of the three ro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.02694v2-abstract-full').style.display = 'inline'; document.getElementById('2306.02694v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.02694v2-abstract-full" style="display: none;"> Loneliness is a distressing personal experience and a growing social issue. Social robots could alleviate the pain of loneliness, particularly for those who lack in-person interaction. This paper investigated how the effect of loneliness on the anthropomorphism of social robots differs by robot appearance, and how it influences purchase intention. Participants viewed a video of one of the three robots (machine-like, animal-like, and human-like) moving and interacting with a human counterpart. Bootstrapped multiple regression results revealed that although the unique effect of animal-likeness on anthropomorphism compared to human-likeness was higher, lonely individuals&#39; tendency to anthropomorphize the animal-like robot was lower than that of the human-like robot. This moderating effect remained significant after covariates were included. Bootstrapped mediation analysis showed that anthropomorphism had both a positive direct effect on purchase intent and a positive indirect effect mediated by likability. Our results suggest that lonely individuals&#39; tendency of anthropomorphizing social robots should not be summarized into one unified inclination. Moreover, by extending the effect of loneliness on anthropomorphism to likability and purchase intent, this current study explored the potential of social robots to be adopted as companions of lonely individuals in their real life. Lastly, we discuss the practical implications of the current study for designing social robots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.02694v2-abstract-full').style.display = 'none'; document.getElementById('2306.02694v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for oral presentation at the 32nd IEEE International Conference on Robot and Human Interactive Communication(RO-MAN 2023). Camera-ready (ver2)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2023 32nd IEEE International Conference on Robot and Human Interactive Communication (RO-MAN), Busan, Korea, Republic of, 2023, pp. 2520-2525 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.00278">arXiv:2305.00278</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.00278">pdf</a>, <a href="https://arxiv.org/format/2305.00278">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Segment Anything Model (SAM) Meets Glass: Mirror and Transparent Objects Cannot Be Easily Detected </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Han%2C+D">Dongsheng Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chaoning Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Qamar%2C+M">Maryam Qamar</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yuna Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">SeungKyu Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+S">Sung-Ho Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+C+S">Choong Seon Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.00278v1-abstract-short" style="display: inline;"> Meta AI Research has recently released SAM (Segment Anything Model) which is trained on a large segmentation dataset of over 1 billion masks. As a foundation model in the field of computer vision, SAM (Segment Anything Model) has gained attention for its impressive performance in generic object segmentation. Despite its strong capability in a wide range of zero-shot transfer tasks, it remains unkn&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00278v1-abstract-full').style.display = 'inline'; document.getElementById('2305.00278v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.00278v1-abstract-full" style="display: none;"> Meta AI Research has recently released SAM (Segment Anything Model) which is trained on a large segmentation dataset of over 1 billion masks. As a foundation model in the field of computer vision, SAM (Segment Anything Model) has gained attention for its impressive performance in generic object segmentation. Despite its strong capability in a wide range of zero-shot transfer tasks, it remains unknown whether SAM can detect things in challenging setups like transparent objects. In this work, we perform an empirical evaluation of two glass-related challenging scenarios: mirror and transparent objects. We found that SAM often fails to detect the glass in both scenarios, which raises concern for deploying the SAM in safety-critical situations that have various forms of glass. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00278v1-abstract-full').style.display = 'none'; document.getElementById('2305.00278v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.01565">arXiv:2304.01565</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.01565">pdf</a>, <a href="https://arxiv.org/format/2304.01565">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.13140/RG.2.2.26493.64480">10.13140/RG.2.2.26493.64480 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Survey on Graph Diffusion Models: Generative AI in Science for Molecule, Protein and Material </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Mengchun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Qamar%2C+M">Maryam Qamar</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+T">Taegoo Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yuna Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chenshuang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Bae%2C+S">Sung-Ho Bae</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chaoning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.01565v1-abstract-short" style="display: inline;"> Diffusion models have become a new SOTA generative modeling method in various fields, for which there are multiple survey works that provide an overall survey. With the number of articles on diffusion models increasing exponentially in the past few years, there is an increasing need for surveys of diffusion models on specific fields. In this work, we are committed to conducting a survey on the gra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01565v1-abstract-full').style.display = 'inline'; document.getElementById('2304.01565v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.01565v1-abstract-full" style="display: none;"> Diffusion models have become a new SOTA generative modeling method in various fields, for which there are multiple survey works that provide an overall survey. With the number of articles on diffusion models increasing exponentially in the past few years, there is an increasing need for surveys of diffusion models on specific fields. In this work, we are committed to conducting a survey on the graph diffusion models. Even though our focus is to cover the progress of diffusion models in graphs, we first briefly summarize how other generative modeling methods are used for graphs. After that, we introduce the mechanism of diffusion models in various forms, which facilitates the discussion on the graph diffusion models. The applications of graph diffusion models mainly fall into the category of AI-generated content (AIGC) in science, for which we mainly focus on how graph diffusion models are utilized for generating molecules and proteins but also cover other cases, including materials design. Moreover, we discuss the issue of evaluating diffusion models in the graph domain and the existing challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01565v1-abstract-full').style.display = 'none'; document.getElementById('2304.01565v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.15060">arXiv:2303.15060</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.15060">pdf</a>, <a href="https://arxiv.org/format/2303.15060">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TMO: Textured Mesh Acquisition of Objects with a Mobile Device by using Differentiable Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Choi%2C+J">Jaehoon Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+D">Dongki Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+T">Taejae Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sangwook Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Youngdong Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Manocha%2C+D">Dinesh Manocha</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Donghwan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.15060v1-abstract-short" style="display: inline;"> We present a new pipeline for acquiring a textured mesh in the wild with a single smartphone which offers access to images, depth maps, and valid poses. Our method first introduces an RGBD-aided structure from motion, which can yield filtered depth maps and refines camera poses guided by corresponding depth. Then, we adopt the neural implicit surface reconstruction method, which allows for high-qu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.15060v1-abstract-full').style.display = 'inline'; document.getElementById('2303.15060v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.15060v1-abstract-full" style="display: none;"> We present a new pipeline for acquiring a textured mesh in the wild with a single smartphone which offers access to images, depth maps, and valid poses. Our method first introduces an RGBD-aided structure from motion, which can yield filtered depth maps and refines camera poses guided by corresponding depth. Then, we adopt the neural implicit surface reconstruction method, which allows for high-quality mesh and develops a new training process for applying a regularization provided by classical multi-view stereo methods. Moreover, we apply a differentiable rendering to fine-tune incomplete texture maps and generate textures which are perceptually closer to the original scene. Our pipeline can be applied to any common objects in the real world without the need for either in-the-lab environments or accurate mask images. We demonstrate results of captured objects with complex shapes and validate our method numerically against existing 3D reconstruction and texture mapping methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.15060v1-abstract-full').style.display = 'none'; document.getElementById('2303.15060v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR23. Project Page: https://jh-choi.github.io/TMO/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.11853">arXiv:2303.11853</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.11853">pdf</a>, <a href="https://arxiv.org/format/2303.11853">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LoRCoN-LO: Long-term Recurrent Convolutional Network-based LiDAR Odometry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+D">Donghwi Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Cho%2C+J">Jae-Kyung Cho</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Younghwa Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+S">Soohyun Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Seong-Woo Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.11853v1-abstract-short" style="display: inline;"> We propose a deep learning-based LiDAR odometry estimation method called LoRCoN-LO that utilizes the long-term recurrent convolutional network (LRCN) structure. The LRCN layer is a structure that can process spatial and temporal information at once by using both CNN and LSTM layers. This feature is suitable for predicting continuous robot movements as it uses point clouds that contain spatial info&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.11853v1-abstract-full').style.display = 'inline'; document.getElementById('2303.11853v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.11853v1-abstract-full" style="display: none;"> We propose a deep learning-based LiDAR odometry estimation method called LoRCoN-LO that utilizes the long-term recurrent convolutional network (LRCN) structure. The LRCN layer is a structure that can process spatial and temporal information at once by using both CNN and LSTM layers. This feature is suitable for predicting continuous robot movements as it uses point clouds that contain spatial information. Therefore, we built a LoRCoN-LO model using the LRCN layer, and predicted the pose of the robot through this model. For performance verification, we conducted experiments exploiting a public dataset (KITTI). The results of the experiment show that LoRCoN-LO displays accurate odometry prediction in the dataset. The code is available at https://github.com/donghwijung/LoRCoN-LO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.11853v1-abstract-full').style.display = 'none'; document.getElementById('2303.11853v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, ICEIC 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10413">arXiv:2301.10413</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.10413">pdf</a>, <a href="https://arxiv.org/format/2301.10413">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Local Feature Extraction from Salient Regions by Feature Map Transformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yerim Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Nizam%2C+N+S+S+B+A">Nur Suriza Syazwany Binti Ahmad Nizam</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Sang-Chul Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10413v1-abstract-short" style="display: inline;"> Local feature matching is essential for many applications, such as localization and 3D reconstruction. However, it is challenging to match feature points accurately in various camera viewpoints and illumination conditions. In this paper, we propose a framework that robustly extracts and describes salient local features regardless of changing light and viewpoints. The framework suppresses illuminat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10413v1-abstract-full').style.display = 'inline'; document.getElementById('2301.10413v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10413v1-abstract-full" style="display: none;"> Local feature matching is essential for many applications, such as localization and 3D reconstruction. However, it is challenging to match feature points accurately in various camera viewpoints and illumination conditions. In this paper, we propose a framework that robustly extracts and describes salient local features regardless of changing light and viewpoints. The framework suppresses illumination variations and encourages structural information to ignore the noise from light and to focus on edges. We classify the elements in the feature covariance matrix, an implicit feature map information, into two components. Our model extracts feature points from salient regions leading to reduced incorrect matches. In our experiments, the proposed method achieved higher accuracy than the state-of-the-art methods in the public dataset, such as HPatches, Aachen Day-Night, and ETH, which especially show highly variant viewpoints and illumination. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10413v1-abstract-full').style.display = 'none'; document.getElementById('2301.10413v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">British Machine Vision Conference (BMVC) 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.15950">arXiv:2211.15950</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.15950">pdf</a>, <a href="https://arxiv.org/format/2211.15950">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhanced artificial intelligence-based diagnosis using CBCT with internal denoising: Clinical validation for discrimination of fungal ball, sinusitis, and normal cases in the maxillary sinus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+K">Kyungsu Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+C+Y">Chae Yeon Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Shin%2C+J+B">Joong Bo Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Chung%2C+M+J">Myung Jin Chung</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y+G">Yong Gi Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.15950v1-abstract-short" style="display: inline;"> The cone-beam computed tomography (CBCT) provides 3D volumetric imaging of a target with low radiation dose and cost compared with conventional computed tomography, and it is widely used in the detection of paranasal sinus disease. However, it lacks the sensitivity to detect soft tissue lesions owing to reconstruction constraints. Consequently, only physicians with expertise in CBCT reading can di&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.15950v1-abstract-full').style.display = 'inline'; document.getElementById('2211.15950v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.15950v1-abstract-full" style="display: none;"> The cone-beam computed tomography (CBCT) provides 3D volumetric imaging of a target with low radiation dose and cost compared with conventional computed tomography, and it is widely used in the detection of paranasal sinus disease. However, it lacks the sensitivity to detect soft tissue lesions owing to reconstruction constraints. Consequently, only physicians with expertise in CBCT reading can distinguish between inherent artifacts or noise and diseases, restricting the use of this imaging modality. The development of artificial intelligence (AI)-based computer-aided diagnosis methods for CBCT to overcome the shortage of experienced physicians has attracted substantial attention. However, advanced AI-based diagnosis addressing intrinsic noise in CBCT has not been devised, discouraging the practical use of AI solutions for CBCT. To address this issue, we propose an AI-based computer-aided diagnosis method using CBCT with a denoising module. This module is implemented before diagnosis to reconstruct the internal ground-truth full-dose scan corresponding to an input CBCT image and thereby improve the diagnostic performance. The external validation results for the unified diagnosis of sinus fungal ball, chronic rhinosinusitis, and normal cases show that the proposed method improves the micro-, macro-average AUC, and accuracy by 7.4, 5.6, and 9.6% (from 86.2, 87.0, and 73.4 to 93.6, 92.6, and 83.0%), respectively, compared with a baseline while improving human diagnosis accuracy by 11% (from 71.7 to 83.0%), demonstrating technical differentiation and clinical effectiveness. This pioneering study on AI-based diagnosis using CBCT indicates denoising can improve diagnostic performance and reader interpretability in images from the sinonasal area, thereby providing a new approach and direction to radiographic image reconstruction regarding the development of AI-based diagnostic solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.15950v1-abstract-full').style.display = 'none'; document.getElementById('2211.15950v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.16423">arXiv:2210.16423</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.16423">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Transferability-based Chain Motion Mapping from Humans to Humanoids for Teleoperation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Stanley%2C+M">Matthew Stanley</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yunsik Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Bowman%2C+M">Michael Bowman</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+L">Lingfeng Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaoli Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.16423v1-abstract-short" style="display: inline;"> Although data-driven motion mapping methods are promising to allow intuitive robot control and teleoperation that generate human-like robot movement, they normally require tedious pair-wise training for each specific human and robot pair. This paper proposes a transferability-based mapping scheme to allow new robot and human input systems to leverage the mapping of existing trained pairs to form a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16423v1-abstract-full').style.display = 'inline'; document.getElementById('2210.16423v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.16423v1-abstract-full" style="display: none;"> Although data-driven motion mapping methods are promising to allow intuitive robot control and teleoperation that generate human-like robot movement, they normally require tedious pair-wise training for each specific human and robot pair. This paper proposes a transferability-based mapping scheme to allow new robot and human input systems to leverage the mapping of existing trained pairs to form a mapping transfer chain, which will reduce the number of new pair-specific mappings that need to be generated. The first part of the mapping schematic is the development of a Synergy Mapping via Dual-Autoencoder (SyDa) method. This method uses the latent features from two autoencoders to extract the common synergy of the two agents. Secondly, a transferability metric is created that approximates how well the mapping between a pair of agents will perform compared to another pair before creating the motion mapping models. Thus, it can guide the formation of an optimal mapping chain for the new human-robot pair. Experiments with human subjects and a Pepper robot demonstrated 1) The SyDa method improves the accuracy and generalizability of the pair mappings, 2) the SyDa method allows for bidirectional mapping that does not prioritize the direction of mapping motion, and 3) the transferability metric measures how compatible two agents are for accurate teleoperation. The combination of the SyDa method and transferability metric creates generalizable and accurate mapping need to create the transfer mapping chain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16423v1-abstract-full').style.display = 'none'; document.getElementById('2210.16423v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.13533">arXiv:2210.13533</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.13533">pdf</a>, <a href="https://arxiv.org/format/2210.13533">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sufficient Invariant Learning for Distribution Shift </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+T">Taero Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+S">Subeen Park</a>, <a href="/search/cs?searchtype=author&amp;query=Lim%2C+S">Sungjun Lim</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yonghan Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Muandet%2C+K">Krikamol Muandet</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+K">Kyungwoo Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.13533v3-abstract-short" style="display: inline;"> Learning robust models under distribution shifts between training and test datasets is a fundamental challenge in machine learning. While learning invariant features across environments is a popular approach, it often assumes that these features are fully observed in both training and test sets-a condition frequently violated in practice. When models rely on invariant features absent in the test s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13533v3-abstract-full').style.display = 'inline'; document.getElementById('2210.13533v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.13533v3-abstract-full" style="display: none;"> Learning robust models under distribution shifts between training and test datasets is a fundamental challenge in machine learning. While learning invariant features across environments is a popular approach, it often assumes that these features are fully observed in both training and test sets-a condition frequently violated in practice. When models rely on invariant features absent in the test set, their robustness in new environments can deteriorate. To tackle this problem, we introduce a novel learning principle called the Sufficient Invariant Learning (SIL) framework, which focuses on learning a sufficient subset of invariant features rather than relying on a single feature. After demonstrating the limitation of existing invariant learning methods, we propose a new algorithm, Adaptive Sharpness-aware Group Distributionally Robust Optimization (ASGDRO), to learn diverse invariant features by seeking common flat minima across the environments. We theoretically demonstrate that finding a common flat minima enables robust predictions based on diverse invariant features. Empirical evaluations on multiple datasets, including our new benchmark, confirm ASGDRO&#39;s robustness against distribution shifts, highlighting the limitations of existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13533v3-abstract-full').style.display = 'none'; document.getElementById('2210.13533v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12363">arXiv:2210.12363</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.12363">pdf</a>, <a href="https://arxiv.org/format/2210.12363">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Convolutional Deep Sets with Task-Dependent Stationary Prior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yohan Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+J">Jinkyoo Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12363v1-abstract-short" style="display: inline;"> Convolutional deep sets are the architecture of a deep neural network (DNN) that can model stationary stochastic process. This architecture uses the kernel smoother and the DNN to construct the translation equivariant functional representations, and thus reflects the inductive bias of the stationarity into DNN. However, since this architecture employs the kernel smoother known as the non-parametri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12363v1-abstract-full').style.display = 'inline'; document.getElementById('2210.12363v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12363v1-abstract-full" style="display: none;"> Convolutional deep sets are the architecture of a deep neural network (DNN) that can model stationary stochastic process. This architecture uses the kernel smoother and the DNN to construct the translation equivariant functional representations, and thus reflects the inductive bias of the stationarity into DNN. However, since this architecture employs the kernel smoother known as the non-parametric model, it may produce ambiguous representations when the number of data points is not given sufficiently. To remedy this issue, we introduce Bayesian convolutional deep sets that construct the random translation equivariant functional representations with stationary prior. Furthermore, we present how to impose the task-dependent prior for each dataset because a wrongly imposed prior forms an even worse representation than that of the kernel smoother. We validate the proposed architecture and its training on various experiments with time-series and image datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12363v1-abstract-full').style.display = 'none'; document.getElementById('2210.12363v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.11153">arXiv:2210.11153</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.11153">pdf</a>, <a href="https://arxiv.org/format/2210.11153">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Reversed Image Signal Processing and RAW Reconstruction. AIM 2022 Challenge Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Conde%2C+M+V">Marcos V. Conde</a>, <a href="/search/cs?searchtype=author&amp;query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yibin Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+J">Jingyang Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Cheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=P%C3%A9rez-Pellitero%2C+E">Eduardo P茅rez-Pellitero</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+F">Fenglong Song</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+F">Furui Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Shuai Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+C">Chaoyu Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiaotao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+L">Lei Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yu Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chenghua Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yingying Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=A%2C+Y">Yong A</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Peisong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Leng%2C+C">Cong Leng</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiaoyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+Z">Zhicun Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhilu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junyi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+M">Ming Liu</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.11153v1-abstract-short" style="display: inline;"> Cameras capture sensor RAW images and transform them into pleasant RGB images, suitable for the human eyes, using their integrated Image Signal Processor (ISP). Numerous low-level vision tasks operate in the RAW domain (e.g. image denoising, white balance) due to its linear relationship with the scene irradiance, wide-range of information at 12bits, and sensor designs. Despite this, RAW image data&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11153v1-abstract-full').style.display = 'inline'; document.getElementById('2210.11153v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.11153v1-abstract-full" style="display: none;"> Cameras capture sensor RAW images and transform them into pleasant RGB images, suitable for the human eyes, using their integrated Image Signal Processor (ISP). Numerous low-level vision tasks operate in the RAW domain (e.g. image denoising, white balance) due to its linear relationship with the scene irradiance, wide-range of information at 12bits, and sensor designs. Despite this, RAW image datasets are scarce and more expensive to collect than the already large and public RGB datasets. This paper introduces the AIM 2022 Challenge on Reversed Image Signal Processing and RAW Reconstruction. We aim to recover raw sensor images from the corresponding RGBs without metadata and, by doing this, &#34;reverse&#34; the ISP transformation. The proposed methods and benchmark establish the state-of-the-art for this low-level vision inverse problem, and generating realistic raw sensor readings can potentially benefit other tasks such as denoising and super-resolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11153v1-abstract-full').style.display = 'none'; document.getElementById('2210.11153v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2022 Advances in Image Manipulation (AIM) workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.07762">arXiv:2210.07762</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.07762">pdf</a>, <a href="https://arxiv.org/format/2210.07762">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Controllable Style Transfer via Test-time Training of Implicit Neural Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Sunwoo Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Min%2C+Y">Youngjo Min</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Younghun Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Seungryong Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.07762v2-abstract-short" style="display: inline;"> We propose a controllable style transfer framework based on Implicit Neural Representation that pixel-wisely controls the stylized output via test-time training. Unlike traditional image optimization methods that often suffer from unstable convergence and learning-based methods that require intensive training and have limited generalization ability, we present a model optimization framework that o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.07762v2-abstract-full').style.display = 'inline'; document.getElementById('2210.07762v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.07762v2-abstract-full" style="display: none;"> We propose a controllable style transfer framework based on Implicit Neural Representation that pixel-wisely controls the stylized output via test-time training. Unlike traditional image optimization methods that often suffer from unstable convergence and learning-based methods that require intensive training and have limited generalization ability, we present a model optimization framework that optimizes the neural networks during test-time with explicit loss functions for style transfer. After being test-time trained once, thanks to the flexibility of the INR-based model, our framework can precisely control the stylized images in a pixel-wise manner and freely adjust image resolution without further optimization or training. We demonstrate several applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.07762v2-abstract-full').style.display = 'none'; document.getElementById('2210.07762v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://ku-cvlab.github.io/INR-st/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.06421">arXiv:2209.06421</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.06421">pdf</a>, <a href="https://arxiv.org/format/2209.06421">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> A Transfer Function Design Using A Knowledge Database based on Deep Image and Primitive Intensity Profile Features Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Younhyun Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+J">Jim Kong</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jinman Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.06421v1-abstract-short" style="display: inline;"> Transfer function (TF) plays a key role for the generation of direct volume rendering (DVR), by enabling accurate identification of structures of interest (SOIs) interactively as well as ensuring appropriate visibility of them. Attempts at mitigating the repetitive manual process of TF design have led to approaches that make use of a knowledge database consisting of pre-designed TFs by domain expe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06421v1-abstract-full').style.display = 'inline'; document.getElementById('2209.06421v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.06421v1-abstract-full" style="display: none;"> Transfer function (TF) plays a key role for the generation of direct volume rendering (DVR), by enabling accurate identification of structures of interest (SOIs) interactively as well as ensuring appropriate visibility of them. Attempts at mitigating the repetitive manual process of TF design have led to approaches that make use of a knowledge database consisting of pre-designed TFs by domain experts. In these approaches, a user navigates the knowledge database to find the most suitable pre-designed TF for their input volume to visualize the SOIs. Although these approaches potentially reduce the workload to generate the TFs, they, however, require manual TF navigation of the knowledge database, as well as the likely fine tuning of the selected TF to suit the input. In this work, we propose a TF design approach where we introduce a new content-based retrieval (CBR) to automatically navigate the knowledge database. Instead of pre-designed TFs, our knowledge database contains image volumes with SOI labels. Given an input image volume, our CBR approach retrieves relevant image volumes (with SOI labels) from the knowledge database; the retrieved labels are then used to generate and optimize TFs of the input. This approach does not need any manual TF navigation and fine tuning. For improving SOI retrieval performance, we propose a two-stage CBR scheme to enable the use of local intensity and regional deep image feature representations in a complementary manner. We demonstrate the capabilities of our approach with comparison to a conventional CBR approach in visualization, where an intensity profile matching algorithm is used, and also with potential use-cases in medical image volume visualization where DVR plays an indispensable role for different clinical usages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06421v1-abstract-full').style.display = 'none'; document.getElementById('2209.06421v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to Computer Graphics Forum for review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.14593">arXiv:2207.14593</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.14593">pdf</a>, <a href="https://arxiv.org/format/2207.14593">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3528233.3530748">10.1145/3528233.3530748 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Deformable 3D Caricatures with Learned Shape Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yucheol Jung</a>, <a href="/search/cs?searchtype=author&amp;query=Jang%2C+W">Wonjong Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+S">Soongjin Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jiaolong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+X">Xin Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+S">Seungyong Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.14593v1-abstract-short" style="display: inline;"> A 3D caricature is an exaggerated 3D depiction of a human face. The goal of this paper is to model the variations of 3D caricatures in a compact parameter space so that we can provide a useful data-driven toolkit for handling 3D caricature deformations. To achieve the goal, we propose an MLP-based framework for building a deformable surface model, which takes a latent code and produces a 3D surfac&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.14593v1-abstract-full').style.display = 'inline'; document.getElementById('2207.14593v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.14593v1-abstract-full" style="display: none;"> A 3D caricature is an exaggerated 3D depiction of a human face. The goal of this paper is to model the variations of 3D caricatures in a compact parameter space so that we can provide a useful data-driven toolkit for handling 3D caricature deformations. To achieve the goal, we propose an MLP-based framework for building a deformable surface model, which takes a latent code and produces a 3D surface. In the framework, a SIREN MLP models a function that takes a 3D position on a fixed template surface and returns a 3D displacement vector for the input position. We create variations of 3D surfaces by learning a hypernetwork that takes a latent code and produces the parameters of the MLP. Once learned, our deformable model provides a nice editing space for 3D caricatures, supporting label-based semantic editing and point-handle-based deformation, both of which produce highly exaggerated and natural 3D caricature shapes. We also demonstrate other applications of our deformable model, such as automatic 3D caricature creation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.14593v1-abstract-full').style.display = 'none'; document.getElementById('2207.14593v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACM SIGGRAPH 2022. For the project page, see https://ycjungsubhuman.github.io/DeepDeformable3DCaricatures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.3.4; I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.10025">arXiv:2207.10025</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.10025">pdf</a>, <a href="https://arxiv.org/format/2207.10025">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning from Synthetic Data: Facial Expression Classification based on Ensemble of Multi-task Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jeong%2C+J">Jae-Yeop Jeong</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+Y">Yeong-Gi Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Oh%2C+J">JiYeon Oh</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+S">Sumin Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Jeong%2C+J">Jin-Woo Jeong</a>, <a href="/search/cs?searchtype=author&amp;query=Jung%2C+Y">Yuchul Jung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.10025v2-abstract-short" style="display: inline;"> Facial expression in-the-wild is essential for various interactive computing domains. Especially, &#34;Learning from Synthetic Data&#34; (LSD) is an important topic in the facial expression recognition task. In this paper, we propose a multi-task learning-based facial expression recognition approach which consists of emotion and appearance learning branches that can share all face information, and present&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.10025v2-abstract-full').style.display = 'inline'; document.getElementById('2207.10025v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.10025v2-abstract-full" style="display: none;"> Facial expression in-the-wild is essential for various interactive computing domains. Especially, &#34;Learning from Synthetic Data&#34; (LSD) is an important topic in the facial expression recognition task. In this paper, we propose a multi-task learning-based facial expression recognition approach which consists of emotion and appearance learning branches that can share all face information, and present preliminary results for the LSD challenge introduced in the 4th affective behavior analysis in-the-wild (ABAW) competition. Our method achieved the mean F1 score of 0.71. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.10025v2-abstract-full').style.display = 'none'; document.getElementById('2207.10025v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Page 3, Added reference [2], [33]</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Jung%2C+Y&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10