CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 125 results for author: <span class="mathjax">Truong, T</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Truong%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Truong, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Truong%2C+T&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Truong, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21932">arXiv:2410.21932</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.21932">pdf</a>, <a href="https://arxiv.org/format/2410.21932">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CT to PET Translation: A Large-scale Dataset and Domain-Knowledge-Guided Diffusion Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+T">Dac Thai Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T+T">Trung Thanh Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H+T">Huu Tien Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T+T">Thanh Trung Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Pham%2C+H+H">Huy Hieu Pham</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T+H">Thanh Hung Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+N">Thao Nguyen Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+P+L">Phi Le Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21932v1-abstract-short" style="display: inline;"> Positron Emission Tomography (PET) and Computed Tomography (CT) are essential for diagnosing, staging, and monitoring various diseases, particularly cancer. Despite their importance, the use of PET/CT systems is limited by the necessity for radioactive materials, the scarcity of PET scanners, and the high cost associated with PET imaging. In contrast, CT scanners are more widely available and sign&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21932v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21932v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21932v1-abstract-full" style="display: none;"> Positron Emission Tomography (PET) and Computed Tomography (CT) are essential for diagnosing, staging, and monitoring various diseases, particularly cancer. Despite their importance, the use of PET/CT systems is limited by the necessity for radioactive materials, the scarcity of PET scanners, and the high cost associated with PET imaging. In contrast, CT scanners are more widely available and significantly less expensive. In response to these challenges, our study addresses the issue of generating PET images from CT images, aiming to reduce both the medical examination cost and the associated health risks for patients. Our contributions are twofold: First, we introduce a conditional diffusion model named CPDM, which, to our knowledge, is one of the initial attempts to employ a diffusion model for translating from CT to PET images. Second, we provide the largest CT-PET dataset to date, comprising 2,028,628 paired CT-PET images, which facilitates the training and evaluation of CT-to-PET translation models. For the CPDM model, we incorporate domain knowledge to develop two conditional maps: the Attention map and the Attenuation map. The former helps the diffusion process focus on areas of interest, while the latter improves PET data correction and ensures accurate diagnostic information. Experimental evaluations across various benchmarks demonstrate that CPDM surpasses existing methods in generating high-quality PET images in terms of multiple metrics. The source code and data samples are available at https://github.com/thanhhff/CPDM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21932v1-abstract-full').style.display = 'none'; document.getElementById('2410.21932v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09803">arXiv:2410.09803</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.09803">pdf</a>, <a href="https://arxiv.org/format/2410.09803">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Socially Aware Motion Planning for Service Robots Using LiDAR and RGB-D Camera </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+P">Duc Phu Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T+L">Thanh Long Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Tu%2C+M+D">Minh Dang Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Quach%2C+C+H">Cong Hoang Quach</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+X+T">Xuan Tung Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Phung%2C+M+D">Manh Duong Phung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09803v1-abstract-short" style="display: inline;"> Service robots that work alongside humans in a shared environment need a navigation system that takes into account not only physical safety but also social norms for mutual cooperation. In this paper, we introduce a motion planning system that includes human states such as positions and velocities and their personal space for social-aware navigation. The system first extracts human positions from&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09803v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09803v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09803v1-abstract-full" style="display: none;"> Service robots that work alongside humans in a shared environment need a navigation system that takes into account not only physical safety but also social norms for mutual cooperation. In this paper, we introduce a motion planning system that includes human states such as positions and velocities and their personal space for social-aware navigation. The system first extracts human positions from the LiDAR and the RGB-D camera. It then uses the Kalman filter to fuse that information for human state estimation. An asymmetric Gaussian function is then employed to model human personal space based on their states. This model is used as the input to the dynamic window approach algorithm to generate trajectories for the robot. Experiments show that the robot is able to navigate alongside humans in a dynamic environment while respecting their physical and psychological comfort. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09803v1-abstract-full').style.display = 'none'; document.getElementById('2410.09803v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of 2024, the 7th International Conference on Control, Robotics and Informatics (ICCRI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08229">arXiv:2410.08229</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.08229">pdf</a>, <a href="https://arxiv.org/format/2410.08229">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Improvement of Spiking Neural Network with Bit Planes and Color Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luu%2C+N+T">Nhan T. Luu</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+D+T">Duong T. Luu</a>, <a href="/search/cs?searchtype=author&amp;query=Pham%2C+N+N">Nam N. Pham</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+C">Thang C. Truong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08229v2-abstract-short" style="display: inline;"> Spiking neural network (SNN) has emerged as a promising paradigm in computational neuroscience and artificial intelligence, offering advantages such as low energy consumption and small memory footprint. However, their practical adoption is constrained by several challenges, prominently among them being performance optimization. In this study, we present a novel approach to enhance the performance&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08229v2-abstract-full').style.display = 'inline'; document.getElementById('2410.08229v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08229v2-abstract-full" style="display: none;"> Spiking neural network (SNN) has emerged as a promising paradigm in computational neuroscience and artificial intelligence, offering advantages such as low energy consumption and small memory footprint. However, their practical adoption is constrained by several challenges, prominently among them being performance optimization. In this study, we present a novel approach to enhance the performance of SNN for images through a new coding method that exploits bit plane representation. Our proposed technique is designed to improve the accuracy of SNN without increasing model size. Also, we investigate the impacts of color models of the proposed coding process. Through extensive experimental validation, we demonstrate the effectiveness of our coding strategy in achieving performance gain across multiple datasets. To the best of our knowledge, this is the first research that considers bit planes and color models in the context of SNN. By leveraging the unique characteristics of bit planes, we hope to unlock new potentials in SNNs performance, potentially paving the way for more efficient and effective SNNs models in future researches and applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08229v2-abstract-full').style.display = 'none'; document.getElementById('2410.08229v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04327">arXiv:2410.04327</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.04327">pdf</a>, <a href="https://arxiv.org/ps/2410.04327">ps</a>, <a href="https://arxiv.org/format/2410.04327">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Hierarchical Taxonomies in Prompt-based Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Phan%2C+H">Hoang Phan</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+M">Minh Le</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Phung%2C+D">Dinh Phung</a>, <a href="/search/cs?searchtype=author&amp;query=Ngo%2C+L">Linh Ngo</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T">Thien Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+N">Nhat Ho</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04327v2-abstract-short" style="display: inline;"> Drawing inspiration from human learning behaviors, this work proposes a novel approach to mitigate catastrophic forgetting in Prompt-based Continual Learning models by exploiting the relationships between continuously emerging class data. We find that applying human habits of organizing and connecting information can serve as an efficient strategy when training deep learning models. Specifically,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04327v2-abstract-full').style.display = 'inline'; document.getElementById('2410.04327v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04327v2-abstract-full" style="display: none;"> Drawing inspiration from human learning behaviors, this work proposes a novel approach to mitigate catastrophic forgetting in Prompt-based Continual Learning models by exploiting the relationships between continuously emerging class data. We find that applying human habits of organizing and connecting information can serve as an efficient strategy when training deep learning models. Specifically, by building a hierarchical tree structure based on the expanding set of labels, we gain fresh insights into the data, identifying groups of similar classes could easily cause confusion. Additionally, we delve deeper into the hidden connections between classes by exploring the original pretrained model&#39;s behavior through an optimal transport-based approach. From these insights, we propose a novel regularization loss function that encourages models to focus more on challenging knowledge areas, thereby enhancing overall performance. Experimentally, our method demonstrated significant superiority over the most robust state-of-the-art models on various benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04327v2-abstract-full').style.display = 'none'; document.getElementById('2410.04327v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04196">arXiv:2410.04196</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.04196">pdf</a>, <a href="https://arxiv.org/format/2410.04196">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Improving Generalization with Flat Hilbert Bayesian Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Pham-Ngoc%2C+Q">Quan Pham-Ngoc</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+N">Nhat Ho</a>, <a href="/search/cs?searchtype=author&amp;query=Phung%2C+D">Dinh Phung</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04196v1-abstract-short" style="display: inline;"> We introduce Flat Hilbert Bayesian Inference (FHBI), an algorithm designed to enhance generalization in Bayesian inference. Our approach involves an iterative two-step procedure with an adversarial functional perturbation step and a functional descent step within the reproducing kernel Hilbert spaces. This methodology is supported by a theoretical analysis that extends previous findings on general&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04196v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04196v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04196v1-abstract-full" style="display: none;"> We introduce Flat Hilbert Bayesian Inference (FHBI), an algorithm designed to enhance generalization in Bayesian inference. Our approach involves an iterative two-step procedure with an adversarial functional perturbation step and a functional descent step within the reproducing kernel Hilbert spaces. This methodology is supported by a theoretical analysis that extends previous findings on generalization ability from finite-dimensional Euclidean spaces to infinite-dimensional functional spaces. To evaluate the effectiveness of FHBI, we conduct comprehensive comparisons against seven baseline methods on the VTAB-1K benchmark, which encompasses 19 diverse datasets across various domains with diverse semantics. Empirical results demonstrate that FHBI consistently outperforms the baselines by notable margins, highlighting its practical efficacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04196v1-abstract-full').style.display = 'none'; document.getElementById('2410.04196v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14435">arXiv:2409.14435</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.14435">pdf</a>, <a href="https://arxiv.org/format/2409.14435">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Compensation for Robotic Joint Failures Using Partially Observable Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pham%2C+T">Tan-Hanh Pham</a>, <a href="/search/cs?searchtype=author&amp;query=Aikins%2C+G">Godwyll Aikins</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tri Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+K">Kim-Doang Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14435v1-abstract-short" style="display: inline;"> Robotic manipulators are widely used in various industries for complex and repetitive tasks. However, they remain vulnerable to unexpected hardware failures. In this study, we address the challenge of enabling a robotic manipulator to complete tasks despite joint malfunctions. Specifically, we develop a reinforcement learning (RL) framework to adaptively compensate for a non-functional joint durin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14435v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14435v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14435v1-abstract-full" style="display: none;"> Robotic manipulators are widely used in various industries for complex and repetitive tasks. However, they remain vulnerable to unexpected hardware failures. In this study, we address the challenge of enabling a robotic manipulator to complete tasks despite joint malfunctions. Specifically, we develop a reinforcement learning (RL) framework to adaptively compensate for a non-functional joint during task execution. Our experimental platform is the Franka robot with 7 degrees of freedom (DOFs). We formulate the problem as a partially observable Markov decision process (POMDP), where the robot is trained under various joint failure conditions and tested in both seen and unseen scenarios. We consider scenarios where a joint is permanently broken and where it functions intermittently. Additionally, we demonstrate the effectiveness of our approach by comparing it with traditional inverse kinematics-based control methods. The results show that the RL algorithm enables the robot to successfully complete tasks even with joint failures, achieving a high success rate with an average rate of 93.6%. This showcases its robustness and adaptability. Our findings highlight the potential of RL to enhance the resilience and reliability of robotic systems, making them better suited for unpredictable environments. All related codes and models are published online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14435v1-abstract-full').style.display = 'none'; document.getElementById('2409.14435v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13945">arXiv:2409.13945</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.13945">pdf</a>, <a href="https://arxiv.org/format/2409.13945">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PureDiffusion: Using Backdoor to Counter Backdoor in Generative Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+V+T">Vu Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+L+B">Long Bao Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13945v1-abstract-short" style="display: inline;"> Diffusion models (DMs) are advanced deep learning models that achieved state-of-the-art capability on a wide range of generative tasks. However, recent studies have shown their vulnerability regarding backdoor attacks, in which backdoored DMs consistently generate a designated result (e.g., a harmful image) called backdoor target when the models&#39; input contains a backdoor trigger. Although various&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13945v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13945v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13945v1-abstract-full" style="display: none;"> Diffusion models (DMs) are advanced deep learning models that achieved state-of-the-art capability on a wide range of generative tasks. However, recent studies have shown their vulnerability regarding backdoor attacks, in which backdoored DMs consistently generate a designated result (e.g., a harmful image) called backdoor target when the models&#39; input contains a backdoor trigger. Although various backdoor techniques have been investigated to attack DMs, defense methods against these threats are still limited and underexplored, especially in inverting the backdoor trigger. In this paper, we introduce PureDiffusion, a novel backdoor defense framework that can efficiently detect backdoor attacks by inverting backdoor triggers embedded in DMs. Our extensive experiments on various trigger-target pairs show that PureDiffusion outperforms existing defense methods with a large gap in terms of fidelity (i.e., how much the inverted trigger resembles the original trigger) and backdoor success rate (i.e., the rate that the inverted trigger leads to the corresponding backdoor target). Notably, in certain cases, backdoor triggers inverted by PureDiffusion even achieve higher attack success rate than the original triggers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13945v1-abstract-full').style.display = 'none'; document.getElementById('2409.13945v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08425">arXiv:2408.08425</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08425">pdf</a>, <a href="https://arxiv.org/format/2408.08425">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICMLA58977.2023.00065">10.1109/ICMLA58977.2023.00065 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Physics-Guided Reinforcement Learning System for Realistic Vehicle Active Suspension Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nhu%2C+A+N">Anh N. Nhu</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+N">Ngoc-Anh Le</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shihang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+D+V">Thang D. V. Truong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08425v1-abstract-short" style="display: inline;"> The suspension system is a crucial part of the automotive chassis, improving vehicle ride comfort and isolating passengers from rough road excitation. Unlike passive suspension, which has constant spring and damping coefficients, active suspension incorporates electronic actuators into the system to dynamically control stiffness and damping variables. However, effectively controlling the suspensio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08425v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08425v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08425v1-abstract-full" style="display: none;"> The suspension system is a crucial part of the automotive chassis, improving vehicle ride comfort and isolating passengers from rough road excitation. Unlike passive suspension, which has constant spring and damping coefficients, active suspension incorporates electronic actuators into the system to dynamically control stiffness and damping variables. However, effectively controlling the suspension system poses a challenging task that necessitates real-time adaptability to various road conditions. This paper presents the Physics-Guided Deep Reinforcement Learning (DRL) for adjusting an active suspension system&#39;s variable kinematics and compliance properties for a quarter-car model in real time. Specifically, the outputs of the model are defined as actuator stiffness and damping control, which are bound within physically realistic ranges to maintain the system&#39;s physical compliance. The proposed model was trained on stochastic road profiles according to ISO 8608 standards to optimize the actuator&#39;s control policy. According to qualitative results on simulations, the vehicle body reacts smoothly to various novel real-world road conditions, having a much lower degree of oscillation. These observations mean a higher level of passenger comfort and better vehicle stability. Quantitatively, DRL outperforms passive systems in reducing the average vehicle body velocity and acceleration by 43.58% and 17.22%, respectively, minimizing the vertical movement impacts on the passengers. The code is publicly available at github.com/anh-nn01/RL4Suspension-ICMLA23. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08425v1-abstract-full').style.display = 'none'; document.getElementById('2408.08425v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">\c{opyright} 2024 IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2023 International Conference on Machine Learning and Applications (ICMLA), pp. 422-429 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06075">arXiv:2408.06075</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.06075">pdf</a>, <a href="https://arxiv.org/format/2408.06075">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-72744-3_15">10.1007/978-3-031-72744-3_15 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Five Pitfalls When Assessing Synthetic Medical Images with Reference Metrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dohmen%2C+M">Melanie Dohmen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Baltruschat%2C+I+M">Ivo M. Baltruschat</a>, <a href="/search/cs?searchtype=author&amp;query=Lenga%2C+M">Matthias Lenga</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06075v2-abstract-short" style="display: inline;"> Reference metrics have been developed to objectively and quantitatively compare two images. Especially for evaluating the quality of reconstructed or compressed images, these metrics have shown very useful. Extensive tests of such metrics on benchmarks of artificially distorted natural images have revealed which metric best correlate with human perception of quality. Direct transfer of these metri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06075v2-abstract-full').style.display = 'inline'; document.getElementById('2408.06075v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06075v2-abstract-full" style="display: none;"> Reference metrics have been developed to objectively and quantitatively compare two images. Especially for evaluating the quality of reconstructed or compressed images, these metrics have shown very useful. Extensive tests of such metrics on benchmarks of artificially distorted natural images have revealed which metric best correlate with human perception of quality. Direct transfer of these metrics to the evaluation of generative models in medical imaging, however, can easily lead to pitfalls, because assumptions about image content, image data format and image interpretation are often very different. Also, the correlation of reference metrics and human perception of quality can vary strongly for different kinds of distortions and commonly used metrics, such as SSIM, PSNR and MAE are not the best choice for all situations. We selected five pitfalls that showcase unexpected and probably undesired reference metric scores and discuss strategies to avoid them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06075v2-abstract-full').style.display = 'none'; document.getElementById('2408.06075v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures, presented at Deep Generative Models workshop @ MICCAI 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In: Mukhopadhyay, A., Oksuz, I., Engelhardt, S., Mehrof, D., Yuan, Y. (eds) Deep Generative Models. DGM4MICCAI 2024. Lecture Notes in Computer Science, vol 15224. Springer, Cham </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03400">arXiv:2408.03400</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.03400">pdf</a>, <a href="https://arxiv.org/format/2408.03400">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Attacks and Defenses for Generative Diffusion Models: A Comprehensive Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+V+T">Vu Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Dang%2C+L+B">Luan Ba Dang</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+L+B">Long Bao Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03400v1-abstract-short" style="display: inline;"> Diffusion models (DMs) have achieved state-of-the-art performance on various generative tasks such as image synthesis, text-to-image, and text-guided image-to-image generation. However, the more powerful the DMs, the more harmful they potentially are. Recent studies have shown that DMs are prone to a wide range of attacks, including adversarial attacks, membership inference, backdoor injection, an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03400v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03400v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03400v1-abstract-full" style="display: none;"> Diffusion models (DMs) have achieved state-of-the-art performance on various generative tasks such as image synthesis, text-to-image, and text-guided image-to-image generation. However, the more powerful the DMs, the more harmful they potentially are. Recent studies have shown that DMs are prone to a wide range of attacks, including adversarial attacks, membership inference, backdoor injection, and various multi-modal threats. Since numerous pre-trained DMs are published widely on the Internet, potential threats from these attacks are especially detrimental to the society, making DM-related security a worth investigating topic. Therefore, in this paper, we conduct a comprehensive survey on the security aspect of DMs, focusing on various attack and defense methods for DMs. First, we present crucial knowledge of DMs with five main types of DMs, including denoising diffusion probabilistic models, denoising diffusion implicit models, noise conditioned score networks, stochastic differential equations, and multi-modal conditional DMs. We further survey a variety of recent studies investigating different types of attacks that exploit the vulnerabilities of DMs. Then, we thoroughly review potential countermeasures to mitigate each of the presented threats. Finally, we discuss open challenges of DM-related security and envision certain research directions for this topic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03400v1-abstract-full').style.display = 'none'; document.getElementById('2408.03400v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01452">arXiv:2408.01452</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.01452">pdf</a>, <a href="https://arxiv.org/format/2408.01452">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Building a Domain-specific Guardrail Model in Production </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Niknazar%2C+M">Mohammad Niknazar</a>, <a href="/search/cs?searchtype=author&amp;query=Haley%2C+P+V">Paul V Haley</a>, <a href="/search/cs?searchtype=author&amp;query=Ramanan%2C+L">Latha Ramanan</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+S+T">Sang T. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Shrinivasan%2C+Y">Yedendra Shrinivasan</a>, <a href="/search/cs?searchtype=author&amp;query=Bhowmick%2C+A+K">Ayan Kumar Bhowmick</a>, <a href="/search/cs?searchtype=author&amp;query=Dey%2C+P">Prasenjit Dey</a>, <a href="/search/cs?searchtype=author&amp;query=Jagmohan%2C+A">Ashish Jagmohan</a>, <a href="/search/cs?searchtype=author&amp;query=Maheshwari%2C+H">Hema Maheshwari</a>, <a href="/search/cs?searchtype=author&amp;query=Ponoth%2C+S">Shom Ponoth</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+R">Robert Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Vempaty%2C+A">Aditya Vempaty</a>, <a href="/search/cs?searchtype=author&amp;query=Haber%2C+N">Nick Haber</a>, <a href="/search/cs?searchtype=author&amp;query=Koyejo%2C+S">Sanmi Koyejo</a>, <a href="/search/cs?searchtype=author&amp;query=Sundararajan%2C+S">Sharad Sundararajan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01452v1-abstract-short" style="display: inline;"> Generative AI holds the promise of enabling a range of sought-after capabilities and revolutionizing workflows in various consumer and enterprise verticals. However, putting a model in production involves much more than just generating an output. It involves ensuring the model is reliable, safe, performant and also adheres to the policy of operation in a particular domain. Guardrails as a necessit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01452v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01452v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01452v1-abstract-full" style="display: none;"> Generative AI holds the promise of enabling a range of sought-after capabilities and revolutionizing workflows in various consumer and enterprise verticals. However, putting a model in production involves much more than just generating an output. It involves ensuring the model is reliable, safe, performant and also adheres to the policy of operation in a particular domain. Guardrails as a necessity for models has evolved around the need to enforce appropriate behavior of models, especially when they are in production. In this paper, we use education as a use case, given its stringent requirements of the appropriateness of content in the domain, to demonstrate how a guardrail model can be trained and deployed in production. Specifically, we describe our experience in building a production-grade guardrail model for a K-12 educational platform. We begin by formulating the requirements for deployment to this sensitive domain. We then describe the training and benchmarking of our domain-specific guardrail model, which outperforms competing open- and closed- instruction-tuned models of similar and larger size, on proprietary education-related benchmarks and public benchmarks related to general aspects of safety. Finally, we detail the choices we made on architecture and the optimizations for deploying this service in production; these range across the stack from the hardware infrastructure to the serving layer to language model inference optimizations. We hope this paper will be instructive to other practitioners looking to create production-grade domain-specific services based on generative AI and large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01452v1-abstract-full').style.display = 'none'; document.getElementById('2408.01452v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04631">arXiv:2407.04631</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.04631">pdf</a>, <a href="https://arxiv.org/format/2407.04631">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An autoencoder for compressing angle-resolved photoemission spectroscopy data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Agustsson%2C+S+Y">Steinn Ymir Agustsson</a>, <a href="/search/cs?searchtype=author&amp;query=Haque%2C+M+A">Mohammad Ahsanul Haque</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+T">Thi Tam Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Bianchi%2C+M">Marco Bianchi</a>, <a href="/search/cs?searchtype=author&amp;query=Klyuchnikov%2C+N">Nikita Klyuchnikov</a>, <a href="/search/cs?searchtype=author&amp;query=Mottin%2C+D">Davide Mottin</a>, <a href="/search/cs?searchtype=author&amp;query=Karras%2C+P">Panagiotis Karras</a>, <a href="/search/cs?searchtype=author&amp;query=Hofmann%2C+P">Philip Hofmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04631v1-abstract-short" style="display: inline;"> Angle-resolved photoemission spectroscopy (ARPES) is a powerful experimental technique to determine the electronic structure of solids. Advances in light sources for ARPES experiments are currently leading to a vast increase of data acquisition rates and data quantity. On the other hand, access time to the most advanced ARPES instruments remains strictly limited, calling for fast, effective, and o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04631v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04631v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04631v1-abstract-full" style="display: none;"> Angle-resolved photoemission spectroscopy (ARPES) is a powerful experimental technique to determine the electronic structure of solids. Advances in light sources for ARPES experiments are currently leading to a vast increase of data acquisition rates and data quantity. On the other hand, access time to the most advanced ARPES instruments remains strictly limited, calling for fast, effective, and on-the-fly data analysis tools to exploit this time. In response to this need, we introduce ARPESNet, a versatile autoencoder network that efficiently summmarises and compresses ARPES datasets. We train ARPESNet on a large and varied dataset of 2-dimensional ARPES data extracted by cutting standard 3-dimensional ARPES datasets along random directions in $\mathbf{k}$. To test the data representation capacity of ARPESNet, we compare $k$-means clustering quality between data compressed by ARPESNet, data compressed by discrete cosine transform, and raw data, at different noise levels. ARPESNet data excels in clustering quality despite its high compression ratio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04631v1-abstract-full').style.display = 'none'; document.getElementById('2407.04631v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01734">arXiv:2407.01734</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.01734">pdf</a>, <a href="https://arxiv.org/format/2407.01734">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Universal Quantum Tomography With Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luu%2C+N+T">Nhan T. Luu</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+C">Thang C. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+D+T">Duong T. Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01734v3-abstract-short" style="display: inline;"> Quantum state tomography is a crucial technique for characterizing the state of a quantum system, which is essential for many applications in quantum technologies. In recent years, there has been growing interest in leveraging neural networks to enhance the efficiency and accuracy of quantum state tomography. Still, many of them did not include mixed quantum state, since pure states are arguably l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01734v3-abstract-full').style.display = 'inline'; document.getElementById('2407.01734v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01734v3-abstract-full" style="display: none;"> Quantum state tomography is a crucial technique for characterizing the state of a quantum system, which is essential for many applications in quantum technologies. In recent years, there has been growing interest in leveraging neural networks to enhance the efficiency and accuracy of quantum state tomography. Still, many of them did not include mixed quantum state, since pure states are arguably less common in practical situations. In this research paper, we present two neural networks based approach for both pure and mixed quantum state tomography: Restricted Feature Based Neural Network and Mixed States Conditional Generative Adversarial Network, evaluate its effectiveness in comparison to existing neural based methods. We demonstrate that our proposed methods can achieve state-of-the-art results in reconstructing mixed quantum states from experimental data. Our work highlights the potential of neural networks in revolutionizing quantum state tomography and facilitating the development of quantum technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01734v3-abstract-full').style.display = 'none'; document.getElementById('2407.01734v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures, 17 illustration, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18602">arXiv:2406.18602</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.18602">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> Multi-level Phenotypic Models of Cardiovascular Disease and Obstructive Sleep Apnea Comorbidities: A Longitudinal Wisconsin Sleep Cohort Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D">Duy Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Hoang%2C+C">Ca Hoang</a>, <a href="/search/cs?searchtype=author&amp;query=Huynh%2C+P+K">Phat K. Huynh</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tien Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D">Dang Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Sharma%2C+A">Abhay Sharma</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T+Q">Trung Q. Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18602v1-abstract-short" style="display: inline;"> Cardiovascular diseases (CVDs) are notably prevalent among patients with obstructive sleep apnea (OSA), posing unique challenges in predicting CVD progression due to the intricate interactions of comorbidities. Traditional models typically lack the necessary dynamic and longitudinal scope to accurately forecast CVD trajectories in OSA patients. This study introduces a novel multi-level phenotypic&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18602v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18602v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18602v1-abstract-full" style="display: none;"> Cardiovascular diseases (CVDs) are notably prevalent among patients with obstructive sleep apnea (OSA), posing unique challenges in predicting CVD progression due to the intricate interactions of comorbidities. Traditional models typically lack the necessary dynamic and longitudinal scope to accurately forecast CVD trajectories in OSA patients. This study introduces a novel multi-level phenotypic model to analyze the progression and interplay of these conditions over time, utilizing data from the Wisconsin Sleep Cohort, which includes 1,123 participants followed for decades. Our methodology comprises three advanced steps: (1) Conducting feature importance analysis through tree-based models to underscore critical predictive variables like total cholesterol, low-density lipoprotein (LDL), and diabetes. (2) Developing a logistic mixed-effects model (LGMM) to track longitudinal transitions and pinpoint significant factors, which displayed a diagnostic accuracy of 0.9556. (3) Implementing t-distributed Stochastic Neighbor Embedding (t-SNE) alongside Gaussian Mixture Models (GMM) to segment patient data into distinct phenotypic clusters that reflect varied risk profiles and disease progression pathways. This phenotypic clustering revealed two main groups, with one showing a markedly increased risk of major adverse cardiovascular events (MACEs), underscored by the significant predictive role of nocturnal hypoxia and sympathetic nervous system activity from sleep data. Analysis of transitions and trajectories with t-SNE and GMM highlighted different progression rates within the cohort, with one cluster progressing more slowly towards severe CVD states than the other. This study offers a comprehensive understanding of the dynamic relationship between CVD and OSA, providing valuable tools for predicting disease onset and tailoring treatment approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18602v1-abstract-full').style.display = 'none'; document.getElementById('2406.18602v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 5 figure, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07107">arXiv:2406.07107</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.07107">pdf</a>, <a href="https://arxiv.org/format/2406.07107">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Agnostic Sharpness-Aware Minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+V">Van-Anh Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Do%2C+T">Thanh-Toan Do</a>, <a href="/search/cs?searchtype=author&amp;query=Phung%2C+D">Dinh Phung</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07107v3-abstract-short" style="display: inline;"> Sharpness-aware minimization (SAM) has been instrumental in improving deep neural network training by minimizing both the training loss and the sharpness of the loss landscape, leading the model into flatter minima that are associated with better generalization properties. In another aspect, Model-Agnostic Meta-Learning (MAML) is a framework designed to improve the adaptability of models. MAML opt&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07107v3-abstract-full').style.display = 'inline'; document.getElementById('2406.07107v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07107v3-abstract-full" style="display: none;"> Sharpness-aware minimization (SAM) has been instrumental in improving deep neural network training by minimizing both the training loss and the sharpness of the loss landscape, leading the model into flatter minima that are associated with better generalization properties. In another aspect, Model-Agnostic Meta-Learning (MAML) is a framework designed to improve the adaptability of models. MAML optimizes a set of meta-models that are specifically tailored for quick adaptation to multiple tasks with minimal fine-tuning steps and can generalize well with limited data. In this work, we explore the connection between SAM and MAML in enhancing model generalization. We introduce Agnostic-SAM, a novel approach that combines the principles of both SAM and MAML. Agnostic-SAM adapts the core idea of SAM by optimizing the model toward wider local minima using training data, while concurrently maintaining low loss values on validation data. By doing so, it seeks flatter minima that are not only robust to small perturbations but also less vulnerable to data distributional shift problems. Our experimental results demonstrate that Agnostic-SAM significantly improves generalization over baselines across a range of datasets and under challenging conditions such as noisy labels or data limitation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07107v3-abstract-full').style.display = 'none'; document.getElementById('2406.07107v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01432">arXiv:2406.01432</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.01432">pdf</a>, <a href="https://arxiv.org/format/2406.01432">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ED-SAM: An Efficient Diffusion Sampling Approach to Domain Generalization in Vision-Language Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Raj%2C+B">Bhiksha Raj</a>, <a href="/search/cs?searchtype=author&amp;query=Cothren%2C+J">Jackson Cothren</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01432v1-abstract-short" style="display: inline;"> The Vision-Language Foundation Model has recently shown outstanding performance in various perception learning tasks. The outstanding performance of the vision-language model mainly relies on large-scale pre-training datasets and different data augmentation techniques. However, the domain generalization problem of the vision-language foundation model needs to be addressed. This problem has limited&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01432v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01432v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01432v1-abstract-full" style="display: none;"> The Vision-Language Foundation Model has recently shown outstanding performance in various perception learning tasks. The outstanding performance of the vision-language model mainly relies on large-scale pre-training datasets and different data augmentation techniques. However, the domain generalization problem of the vision-language foundation model needs to be addressed. This problem has limited the generalizability of the vision-language foundation model to unknown data distributions. In this paper, we introduce a new simple but efficient Diffusion Sampling approach to Domain Generalization (ED-SAM) to improve the generalizability of the vision-language foundation model. Our theoretical analysis in this work reveals the critical role and relation of the diffusion model to domain generalization in the vision-language foundation model. Then, based on the insightful analysis, we introduce a new simple yet effective Transport Transformation to diffusion sampling method. It can effectively generate adversarial samples to improve the generalizability of the foundation model against unknown data distributions. The experimental results on different scales of vision-language pre-training datasets, including CC3M, CC12M, and LAION400M, have consistently shown State-of-the-Art performance and scalability of the proposed ED-SAM approach compared to the other recent methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01432v1-abstract-full').style.display = 'none'; document.getElementById('2406.01432v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01429">arXiv:2406.01429</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.01429">pdf</a>, <a href="https://arxiv.org/format/2406.01429">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EAGLE: Efficient Adaptive Geometry-based Learning in Cross-view Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Prabhu%2C+U">Utsav Prabhu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Dongyi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Raj%2C+B">Bhiksha Raj</a>, <a href="/search/cs?searchtype=author&amp;query=Gauch%2C+S">Susan Gauch</a>, <a href="/search/cs?searchtype=author&amp;query=Subbiah%2C+J">Jeyamkondan Subbiah</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01429v2-abstract-short" style="display: inline;"> Unsupervised Domain Adaptation has been an efficient approach to transferring the semantic segmentation model across data distributions. Meanwhile, the recent Open-vocabulary Semantic Scene understanding based on large-scale vision language models is effective in open-set settings because it can learn diverse concepts and categories. However, these prior methods fail to generalize across different&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01429v2-abstract-full').style.display = 'inline'; document.getElementById('2406.01429v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01429v2-abstract-full" style="display: none;"> Unsupervised Domain Adaptation has been an efficient approach to transferring the semantic segmentation model across data distributions. Meanwhile, the recent Open-vocabulary Semantic Scene understanding based on large-scale vision language models is effective in open-set settings because it can learn diverse concepts and categories. However, these prior methods fail to generalize across different camera views due to the lack of cross-view geometric modeling. At present, there are limited studies analyzing cross-view learning. To address this problem, we introduce a novel Unsupervised Cross-view Adaptation Learning approach to modeling the geometric structural change across views in Semantic Scene Understanding. First, we introduce a novel Cross-view Geometric Constraint on Unpaired Data to model structural changes in images and segmentation masks across cameras. Second, we present a new Geodesic Flow-based Correlation Metric to efficiently measure the geometric structural changes across camera views. Third, we introduce a novel view-condition prompting mechanism to enhance the view-information modeling of the open-vocabulary segmentation network in cross-view adaptation learning. The experiments on different cross-view adaptation benchmarks have shown the effectiveness of our approach in cross-view modeling, demonstrating that we achieve State-of-the-Art (SOTA) performance compared to prior unsupervised domain adaptation and open-vocabulary semantic segmentation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01429v2-abstract-full').style.display = 'none'; document.getElementById('2406.01429v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS&#39;24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00960">arXiv:2406.00960</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00960">pdf</a>, <a href="https://arxiv.org/format/2406.00960">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> PDP: Physics-Based Character Animation via Diffusion Policy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+E">Takara E. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Piseno%2C+M">Michael Piseno</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Z">Zhaoming Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C+K">C. Karen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00960v1-abstract-short" style="display: inline;"> Generating diverse and realistic human motion that can physically interact with an environment remains a challenging research area in character animation. Meanwhile, diffusion-based methods, as proposed by the robotics community, have demonstrated the ability to capture highly diverse and multi-modal skills. However, naively training a diffusion policy often results in unstable motions for high-fr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00960v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00960v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00960v1-abstract-full" style="display: none;"> Generating diverse and realistic human motion that can physically interact with an environment remains a challenging research area in character animation. Meanwhile, diffusion-based methods, as proposed by the robotics community, have demonstrated the ability to capture highly diverse and multi-modal skills. However, naively training a diffusion policy often results in unstable motions for high-frequency, under-actuated control tasks like bipedal locomotion due to rapidly accumulating compounding errors, pushing the agent away from optimal training trajectories. The key idea lies in using RL policies not just for providing optimal trajectories but for providing corrective actions in sub-optimal states, giving the policy a chance to correct for errors caused by environmental stimulus, model errors, or numerical errors in simulation. Our method, Physics-Based Character Animation via Diffusion Policy (PDP), combines reinforcement learning (RL) and behavior cloning (BC) to create a robust diffusion policy for physics-based character animation. We demonstrate PDP on perturbation recovery, universal motion tracking, and physics-based text-to-motion synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00960v1-abstract-full').style.display = 'none'; document.getElementById('2406.00960v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09334">arXiv:2405.09334</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09334">pdf</a>, <a href="https://arxiv.org/format/2405.09334">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Content-Based Image Retrieval for Multi-Class Volumetric Radiology Images: A Benchmark Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jush%2C+F+K">Farnaz Khun Jush</a>, <a href="/search/cs?searchtype=author&amp;query=Vogler%2C+S">Steffen Vogler</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Lenga%2C+M">Matthias Lenga</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09334v2-abstract-short" style="display: inline;"> While content-based image retrieval (CBIR) has been extensively studied in natural image retrieval, its application to medical images presents ongoing challenges, primarily due to the 3D nature of medical images. Recent studies have shown the potential use of pre-trained vision embeddings for CBIR in the context of radiology image retrieval. However, a benchmark for the retrieval of 3D volumetric&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09334v2-abstract-full').style.display = 'inline'; document.getElementById('2405.09334v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09334v2-abstract-full" style="display: none;"> While content-based image retrieval (CBIR) has been extensively studied in natural image retrieval, its application to medical images presents ongoing challenges, primarily due to the 3D nature of medical images. Recent studies have shown the potential use of pre-trained vision embeddings for CBIR in the context of radiology image retrieval. However, a benchmark for the retrieval of 3D volumetric medical images is still lacking, hindering the ability to objectively evaluate and compare the efficiency of proposed CBIR approaches in medical imaging. In this study, we extend previous work and establish a benchmark for region-based and localized multi-organ retrieval using the TotalSegmentator dataset (TS) with detailed multi-organ annotations. We benchmark embeddings derived from pre-trained supervised models on medical images against embeddings derived from pre-trained unsupervised models on non-medical images for 29 coarse and 104 detailed anatomical structures in volume and region levels. For volumetric image retrieval, we adopt a late interaction re-ranking method inspired by text matching. We compare it against the original method proposed for volume and region retrieval and achieve a retrieval recall of 1.0 for diverse anatomical regions with a wide size range. The findings and methodologies presented in this paper provide insights and benchmarks for further development and evaluation of CBIR approaches in the context of medical imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09334v2-abstract-full').style.display = 'none'; document.getElementById('2405.09334v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 12 Figures, 22 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08431">arXiv:2405.08431</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.08431">pdf</a>, <a href="https://arxiv.org/format/2405.08431">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Similarity and Quality Metrics for MR Image-To-Image Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dohmen%2C+M">Melanie Dohmen</a>, <a href="/search/cs?searchtype=author&amp;query=Klemens%2C+M">Mark Klemens</a>, <a href="/search/cs?searchtype=author&amp;query=Baltruschat%2C+I">Ivo Baltruschat</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Lenga%2C+M">Matthias Lenga</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08431v4-abstract-short" style="display: inline;"> Image-to-image translation can create large impact in medical imaging, as images can be synthetically transformed to other modalities, sequence types, higher resolutions or lower noise levels. To ensure patient safety, these methods should be validated by human readers, which requires a considerable amount of time and costs. Quantitative metrics can effectively complement such studies and provide&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08431v4-abstract-full').style.display = 'inline'; document.getElementById('2405.08431v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08431v4-abstract-full" style="display: none;"> Image-to-image translation can create large impact in medical imaging, as images can be synthetically transformed to other modalities, sequence types, higher resolutions or lower noise levels. To ensure patient safety, these methods should be validated by human readers, which requires a considerable amount of time and costs. Quantitative metrics can effectively complement such studies and provide reproducible and objective assessment of synthetic images. If a reference is available, the similarity of MR images is frequently evaluated by SSIM and PSNR metrics, even though these metrics are not or too sensitive regarding specific distortions. When reference images to compare with are not available, non-reference quality metrics can reliably detect specific distortions, such as blurriness. To provide an overview on distortion sensitivity, we quantitatively analyze 11 similarity (reference) and 12 quality (non-reference) metrics for assessing synthetic images. We additionally include a metric on a downstream segmentation task. We investigate the sensitivity regarding 11 kinds of distortions and typical MR artifacts, and analyze the influence of different normalization methods on each metric and distortion. Finally, we derive recommendations for effective usage of the analyzed similarity and quality metrics for evaluation of image-to-image translation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08431v4-abstract-full').style.display = 'none'; document.getElementById('2405.08431v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 8 figures, supplement with 16 pages, 10 figures, submitted to Nature Scientific Reports</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01337">arXiv:2405.01337</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.01337">pdf</a>, <a href="https://arxiv.org/format/2405.01337">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-view Action Recognition via Directed Gromov-Wasserstein Discrepancy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H">Hoang-Quan Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01337v1-abstract-short" style="display: inline;"> Action recognition has become one of the popular research topics in computer vision. There are various methods based on Convolutional Networks and self-attention mechanisms as Transformers to solve both spatial and temporal dimensions problems of action recognition tasks that achieve competitive performances. However, these methods lack a guarantee of the correctness of the action subject that the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01337v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01337v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01337v1-abstract-full" style="display: none;"> Action recognition has become one of the popular research topics in computer vision. There are various methods based on Convolutional Networks and self-attention mechanisms as Transformers to solve both spatial and temporal dimensions problems of action recognition tasks that achieve competitive performances. However, these methods lack a guarantee of the correctness of the action subject that the models give attention to, i.e., how to ensure an action recognition model focuses on the proper action subject to make a reasonable action prediction. In this paper, we propose a multi-view attention consistency method that computes the similarity between two attentions from two different views of the action videos using Directed Gromov-Wasserstein Discrepancy. Furthermore, our approach applies the idea of Neural Radiance Field to implicitly render the features from novel views when training on single-view datasets. Therefore, the contributions in this work are three-fold. Firstly, we introduce the multi-view attention consistency to solve the problem of reasonable prediction in action recognition. Secondly, we define a new metric for multi-view consistent attention using Directed Gromov-Wasserstein Discrepancy. Thirdly, we built an action recognition model based on Video Transformers and Neural Radiance Fields. Compared to the recent action recognition methods, the proposed approach achieves state-of-the-art results on three large-scale datasets, i.e., Jester, Something-Something V2, and Kinetics-400. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01337v1-abstract-full').style.display = 'none'; document.getElementById('2405.01337v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00681">arXiv:2405.00681</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.00681">pdf</a>, <a href="https://arxiv.org/format/2405.00681">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Delay and Overhead Efficient Transmission Scheduling for Federated Learning in UAV Swarms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hoang%2C+D+N+M">Duc N. M. Hoang</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+V+T">Vu Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+H+D">Hung Duy Le</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+L+B">Long Bao Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00681v1-abstract-short" style="display: inline;"> This paper studies the wireless scheduling design to coordinate the transmissions of (local) model parameters of federated learning (FL) for a swarm of unmanned aerial vehicles (UAVs). The overall goal of the proposed design is to realize the FL training and aggregation processes with a central aggregator exploiting the sensory data collected by the UAVs but it considers the multi-hop wireless net&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00681v1-abstract-full').style.display = 'inline'; document.getElementById('2405.00681v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00681v1-abstract-full" style="display: none;"> This paper studies the wireless scheduling design to coordinate the transmissions of (local) model parameters of federated learning (FL) for a swarm of unmanned aerial vehicles (UAVs). The overall goal of the proposed design is to realize the FL training and aggregation processes with a central aggregator exploiting the sensory data collected by the UAVs but it considers the multi-hop wireless network formed by the UAVs. Such transmissions of model parameters over the UAV-based wireless network potentially cause large transmission delays and overhead. Our proposed framework smartly aggregates local model parameters trained by the UAVs while efficiently transmitting the underlying parameters to the central aggregator in each FL global round. We theoretically show that the proposed scheme achieves minimal delay and communication overhead. Extensive numerical experiments demonstrate the superiority of the proposed scheme compared to other baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00681v1-abstract-full').style.display = 'none'; document.getElementById('2405.00681v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to WCNC&#39;24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.02421">arXiv:2404.02421</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.02421">pdf</a>, <a href="https://arxiv.org/format/2404.02421">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Revisiting subword tokenization: A case study on affixal negation in large language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+H">Thinh Hung Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Otmakhova%2C+Y">Yulia Otmakhova</a>, <a href="/search/cs?searchtype=author&amp;query=Verspoor%2C+K">Karin Verspoor</a>, <a href="/search/cs?searchtype=author&amp;query=Cohn%2C+T">Trevor Cohn</a>, <a href="/search/cs?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.02421v2-abstract-short" style="display: inline;"> In this work, we measure the impact of affixal negation on modern English large language models (LLMs). In affixal negation, the negated meaning is expressed through a negative morpheme, which is potentially challenging for LLMs as their tokenizers are often not morphologically plausible. We conduct extensive experiments using LLMs with different subword tokenization methods, which lead to several&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02421v2-abstract-full').style.display = 'inline'; document.getElementById('2404.02421v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.02421v2-abstract-full" style="display: none;"> In this work, we measure the impact of affixal negation on modern English large language models (LLMs). In affixal negation, the negated meaning is expressed through a negative morpheme, which is potentially challenging for LLMs as their tokenizers are often not morphologically plausible. We conduct extensive experiments using LLMs with different subword tokenization methods, which lead to several insights on the interaction between tokenization performance and negation sensitivity. Despite some interesting mismatches between tokenization accuracy and negation detection performance, we show that models can, on the whole, reliably recognize the meaning of affixal negation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02421v2-abstract-full').style.display = 'none'; document.getElementById('2404.02421v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.02715">arXiv:2403.02715</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.02715">pdf</a>, <a href="https://arxiv.org/format/2403.02715">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Crossing Linguistic Horizons: Finetuning and Comprehensive Evaluation of Vietnamese Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+S+T">Sang T. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+D+Q">Duc Q. Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T">Toan Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+D+D">Dong D. Le</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+N+N">Nhi N. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Quan%2C+T">Tho Quan</a>, <a href="/search/cs?searchtype=author&amp;query=Koyejo%2C+S">Sanmi Koyejo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.02715v2-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have underscored their importance in the evolution of artificial intelligence. However, despite extensive pretraining on multilingual datasets, available open-sourced LLMs exhibit limited effectiveness in processing Vietnamese. The challenge is exacerbated by the absence of systematic benchmark datasets and metrics tailored for Vietnamese LLM eva&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02715v2-abstract-full').style.display = 'inline'; document.getElementById('2403.02715v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.02715v2-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have underscored their importance in the evolution of artificial intelligence. However, despite extensive pretraining on multilingual datasets, available open-sourced LLMs exhibit limited effectiveness in processing Vietnamese. The challenge is exacerbated by the absence of systematic benchmark datasets and metrics tailored for Vietnamese LLM evaluation. To mitigate these issues, we have finetuned LLMs specifically for Vietnamese and developed a comprehensive evaluation framework encompassing 10 common tasks and 31 metrics. Our evaluation results reveal that the fine-tuned LLMs exhibit enhanced comprehension and generative capabilities in Vietnamese. Moreover, our analysis indicates that models with more parameters can introduce more biases and uncalibrated outputs and the key factor influencing LLM performance is the quality of the training or fine-tuning datasets. These insights underscore the significance of meticulous fine-tuning with high-quality datasets in enhancing LLM performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02715v2-abstract-full').style.display = 'none'; document.getElementById('2403.02715v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">51 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.06692">arXiv:2401.06692</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.06692">pdf</a>, <a href="https://arxiv.org/format/2401.06692">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Experimental Design Framework for Label-Efficient Supervised Finetuning of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+G">Gantavya Bhatt</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yifang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+A+M">Arnav M. Das</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jifan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+S+T">Sang T. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Mussmann%2C+S">Stephen Mussmann</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yinglun Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Bilmes%2C+J">Jeffrey Bilmes</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+S+S">Simon S. Du</a>, <a href="/search/cs?searchtype=author&amp;query=Jamieson%2C+K">Kevin Jamieson</a>, <a href="/search/cs?searchtype=author&amp;query=Ash%2C+J+T">Jordan T. Ash</a>, <a href="/search/cs?searchtype=author&amp;query=Nowak%2C+R+D">Robert D. Nowak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.06692v3-abstract-short" style="display: inline;"> Supervised finetuning (SFT) on instruction datasets has played a crucial role in achieving the remarkable zero-shot generalization capabilities observed in modern large language models (LLMs). However, the annotation efforts required to produce high quality responses for instructions are becoming prohibitively expensive, especially as the number of tasks spanned by instruction datasets continues t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06692v3-abstract-full').style.display = 'inline'; document.getElementById('2401.06692v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.06692v3-abstract-full" style="display: none;"> Supervised finetuning (SFT) on instruction datasets has played a crucial role in achieving the remarkable zero-shot generalization capabilities observed in modern large language models (LLMs). However, the annotation efforts required to produce high quality responses for instructions are becoming prohibitively expensive, especially as the number of tasks spanned by instruction datasets continues to increase. Active learning is effective in identifying useful subsets of samples to annotate from an unlabeled pool, but its high computational cost remains a barrier to its widespread applicability in the context of LLMs. To mitigate the annotation cost of SFT and circumvent the computational bottlenecks of active learning, we propose using experimental design. Experimental design techniques select the most informative samples to label, and typically maximize some notion of uncertainty and/or diversity. In our work, we implement a framework that evaluates several existing and novel experimental design techniques and find that these methods consistently yield significant gains in label efficiency with little computational overhead. On generative tasks, our methods achieve the same generalization performance with only $50\%$ of annotation cost required by random sampling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06692v3-abstract-full').style.display = 'none'; document.getElementById('2401.06692v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of the Association for Computational Linguistics: ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01393">arXiv:2401.01393</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.01393">pdf</a>, <a href="https://arxiv.org/format/2401.01393">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Complex Variables">math.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Backtracking New Q-Newton&#39;s method, Newton&#39;s flow, Voronoi&#39;s diagram and Stochastic root finding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fornaess%2C+J+E">John Erik Fornaess</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+M">Mi Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+T">Tuyen Trung Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Watanabe%2C+T">Takayuki Watanabe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01393v2-abstract-short" style="display: inline;"> A new variant of Newton&#39;s method - named Backtracking New Q-Newton&#39;s method (BNQN) - which has strong theoretical guarantee, is easy to implement, and has good experimental performance, was recently introduced by the third author. Experiments performed previously showed some remarkable properties of the basins of attractions for finding roots of polynomials and meromorphic functions, with BNQN.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01393v2-abstract-full').style.display = 'inline'; document.getElementById('2401.01393v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01393v2-abstract-full" style="display: none;"> A new variant of Newton&#39;s method - named Backtracking New Q-Newton&#39;s method (BNQN) - which has strong theoretical guarantee, is easy to implement, and has good experimental performance, was recently introduced by the third author. Experiments performed previously showed some remarkable properties of the basins of attractions for finding roots of polynomials and meromorphic functions, with BNQN. In general, they look more smooth than that of Newton&#39;s method. In this paper, we continue to experimentally explore in depth this remarkable phenomenon, and connect BNQN to Newton&#39;s flow and Voronoi&#39;s diagram. This link poses a couple of challenging puzzles to be explained. Experiments also indicate that BNQN is more robust against random perturbations than Newton&#39;s method and Random Relaxed Newton&#39;s method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01393v2-abstract-full').style.display = 'none'; document.getElementById('2401.01393v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">48 pages. Some typos are fixed. Comments are welcome!. arXiv admin note: text overlap with arXiv:2312.12166</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.07273">arXiv:2312.07273</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.07273">pdf</a>, <a href="https://arxiv.org/format/2312.07273">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Pretrained Vision Embeddings for Near- and Duplicate Detection in Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Jush%2C+F+K">Farnaz Khun Jush</a>, <a href="/search/cs?searchtype=author&amp;query=Lenga%2C+M">Matthias Lenga</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.07273v2-abstract-short" style="display: inline;"> Near- and duplicate image detection is a critical concern in the field of medical imaging. Medical datasets often contain similar or duplicate images from various sources, which can lead to significant performance issues and evaluation biases, especially in machine learning tasks due to data leakage between training and testing subsets. In this paper, we present an approach for identifying near- a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07273v2-abstract-full').style.display = 'inline'; document.getElementById('2312.07273v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.07273v2-abstract-full" style="display: none;"> Near- and duplicate image detection is a critical concern in the field of medical imaging. Medical datasets often contain similar or duplicate images from various sources, which can lead to significant performance issues and evaluation biases, especially in machine learning tasks due to data leakage between training and testing subsets. In this paper, we present an approach for identifying near- and duplicate 3D medical images leveraging publicly available 2D computer vision embeddings. We assessed our approach by comparing embeddings extracted from two state-of-the-art self-supervised pretrained models and two different vector index structures for similarity retrieval. We generate an experimental benchmark based on the publicly available Medical Segmentation Decathlon dataset. The proposed method yields promising results for near- and duplicate image detection achieving a mean sensitivity and specificity of 0.9645 and 0.8559, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07273v2-abstract-full').style.display = 'none'; document.getElementById('2312.07273v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to International Symposium on Biomedical Imaging 2024, Athens, Greece</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.18620">arXiv:2311.18620</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.18620">pdf</a>, <a href="https://arxiv.org/format/2311.18620">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Data-driven prediction of tool wear using Bayesian-regularized artificial neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+T">Tam T. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Airao%2C+J">Jay Airao</a>, <a href="/search/cs?searchtype=author&amp;query=Karras%2C+P">Panagiotis Karras</a>, <a href="/search/cs?searchtype=author&amp;query=Hojati%2C+F">Faramarz Hojati</a>, <a href="/search/cs?searchtype=author&amp;query=Azarhoushang%2C+B">Bahman Azarhoushang</a>, <a href="/search/cs?searchtype=author&amp;query=Aghababaei%2C+R">Ramin Aghababaei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.18620v1-abstract-short" style="display: inline;"> The prediction of tool wear helps minimize costs and enhance product quality in manufacturing. While existing data-driven models using machine learning and deep learning have contributed to the accurate prediction of tool wear, they often lack generality and require substantial training data for high accuracy. In this paper, we propose a new data-driven model that uses Bayesian Regularized Artific&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18620v1-abstract-full').style.display = 'inline'; document.getElementById('2311.18620v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.18620v1-abstract-full" style="display: none;"> The prediction of tool wear helps minimize costs and enhance product quality in manufacturing. While existing data-driven models using machine learning and deep learning have contributed to the accurate prediction of tool wear, they often lack generality and require substantial training data for high accuracy. In this paper, we propose a new data-driven model that uses Bayesian Regularized Artificial Neural Networks (BRANNs) to precisely predict milling tool wear. BRANNs combine the strengths and leverage the benefits of artificial neural networks (ANNs) and Bayesian regularization, whereby ANNs learn complex patterns and Bayesian regularization handles uncertainty and prevents overfitting, resulting in a more generalized model. We treat both process parameters and monitoring sensor signals as BRANN input parameters. We conducted an extensive experimental study featuring four different experimental data sets, including the NASA Ames milling dataset, the 2010 PHM Data Challenge dataset, the NUAA Ideahouse tool wear dataset, and an in-house performed end-milling of the Ti6Al4V dataset. We inspect the impact of input features, training data size, hidden units, training algorithms, and transfer functions on the performance of the proposed BRANN model and demonstrate that it outperforms existing state-of-the-art models in terms of accuracy and reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18620v1-abstract-full').style.display = 'none'; document.getElementById('2311.18620v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.15965">arXiv:2311.15965</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.15965">pdf</a>, <a href="https://arxiv.org/format/2311.15965">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Prabhu%2C+U">Utsav Prabhu</a>, <a href="/search/cs?searchtype=author&amp;query=Raj%2C+B">Bhiksha Raj</a>, <a href="/search/cs?searchtype=author&amp;query=Cothren%2C+J">Jackson Cothren</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.15965v2-abstract-short" style="display: inline;"> Continual Learning in semantic scene segmentation aims to continually learn new unseen classes in dynamic environments while maintaining previously learned knowledge. Prior studies focused on modeling the catastrophic forgetting and background shift challenges in continual learning. However, fairness, another major challenge that causes unfair predictions leading to low performance among major and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15965v2-abstract-full').style.display = 'inline'; document.getElementById('2311.15965v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.15965v2-abstract-full" style="display: none;"> Continual Learning in semantic scene segmentation aims to continually learn new unseen classes in dynamic environments while maintaining previously learned knowledge. Prior studies focused on modeling the catastrophic forgetting and background shift challenges in continual learning. However, fairness, another major challenge that causes unfair predictions leading to low performance among major and minor classes, still needs to be well addressed. In addition, prior methods have yet to model the unknown classes well, thus resulting in producing non-discriminative features among unknown classes. This paper presents a novel Fairness Learning via Contrastive Attention Approach to continual learning in semantic scene understanding. In particular, we first introduce a new Fairness Contrastive Clustering loss to address the problems of catastrophic forgetting and fairness. Then, we propose an attention-based visual grammar approach to effectively model the background shift problem and unknown classes, producing better feature representations for different unknown classes. Through our experiments, our proposed approach achieves State-of-the-Art (SOTA) performance on different continual learning benchmarks, i.e., ADE20K, Cityscapes, and Pascal VOC. It promotes the fairness of the continual semantic segmentation model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15965v2-abstract-full').style.display = 'none'; document.getElementById('2311.15965v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.15206">arXiv:2311.15206</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.15206">pdf</a>, <a href="https://arxiv.org/format/2311.15206">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Insect-Foundation: A Foundation Model and Large-scale 1M Dataset for Visual Insect Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H">Hoang-Quan Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+X+B">Xuan Bac Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Dowling%2C+A">Ashley Dowling</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.15206v2-abstract-short" style="display: inline;"> In precision agriculture, the detection and recognition of insects play an essential role in the ability of crops to grow healthy and produce a high-quality yield. The current machine vision model requires a large volume of data to achieve high performance. However, there are approximately 5.5 million different insect species in the world. None of the existing insect datasets can cover even a frac&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15206v2-abstract-full').style.display = 'inline'; document.getElementById('2311.15206v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.15206v2-abstract-full" style="display: none;"> In precision agriculture, the detection and recognition of insects play an essential role in the ability of crops to grow healthy and produce a high-quality yield. The current machine vision model requires a large volume of data to achieve high performance. However, there are approximately 5.5 million different insect species in the world. None of the existing insect datasets can cover even a fraction of them due to varying geographic locations and acquisition costs. In this paper, we introduce a novel &#34;Insect-1M&#34; dataset, a game-changing resource poised to revolutionize insect-related foundation model training. Covering a vast spectrum of insect species, our dataset, including 1 million images with dense identification labels of taxonomy hierarchy and insect descriptions, offers a panoramic view of entomology, enabling foundation models to comprehend visual and semantic information about insects like never before. Then, to efficiently establish an Insect Foundation Model, we develop a micro-feature self-supervised learning method with a Patch-wise Relevant Attention mechanism capable of discerning the subtle differences among insect images. In addition, we introduce Description Consistency loss to improve micro-feature modeling via insect descriptions. Through our experiments, we illustrate the effectiveness of our proposed approach in insect modeling and achieve State-of-the-Art performance on standard benchmarks of insect-related tasks. Our Insect Foundation Model and Dataset promise to empower the next generation of insect-related vision models, bringing them closer to the ultimate goal of precision agriculture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15206v2-abstract-full').style.display = 'none'; document.getElementById('2311.15206v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.13547">arXiv:2311.13547</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.13547">pdf</a>, <a href="https://arxiv.org/format/2311.13547">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Medical Image Retrieval Using Pretrained Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jush%2C+F+K">Farnaz Khun Jush</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Vogler%2C+S">Steffen Vogler</a>, <a href="/search/cs?searchtype=author&amp;query=Lenga%2C+M">Matthias Lenga</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.13547v1-abstract-short" style="display: inline;"> A wide range of imaging techniques and data formats available for medical images make accurate retrieval from image databases challenging. Efficient retrieval systems are crucial in advancing medical research, enabling large-scale studies and innovative diagnostic tools. Thus, addressing the challenges of medical image retrieval is essential for the continued enhancement of healthcare and resear&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13547v1-abstract-full').style.display = 'inline'; document.getElementById('2311.13547v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.13547v1-abstract-full" style="display: none;"> A wide range of imaging techniques and data formats available for medical images make accurate retrieval from image databases challenging. Efficient retrieval systems are crucial in advancing medical research, enabling large-scale studies and innovative diagnostic tools. Thus, addressing the challenges of medical image retrieval is essential for the continued enhancement of healthcare and research. In this study, we evaluated the feasibility of employing four state-of-the-art pretrained models for medical image retrieval at modality, body region, and organ levels and compared the results of two similarity indexing approaches. Since the employed networks take 2D images, we analyzed the impacts of weighting and sampling strategies to incorporate 3D information during retrieval of 3D volumes. We showed that medical image retrieval is feasible using pretrained networks without any additional training or fine-tuning steps. Using pretrained embeddings, we achieved a recall of 1 for various tasks at modality, body region, and organ level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13547v1-abstract-full').style.display = 'none'; document.getElementById('2311.13547v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.17215">arXiv:2309.17215</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.17215">pdf</a>, <a href="https://arxiv.org/format/2309.17215">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RSAM: Learning on manifolds with Riemannian Sharpness-aware Minimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H">Hoang-Phi Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Pham%2C+T">Tung Pham</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+M">Minh-Tuan Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Harandi%2C+M">Mehrtash Harandi</a>, <a href="/search/cs?searchtype=author&amp;query=Phung%2C+D">Dinh Phung</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.17215v1-abstract-short" style="display: inline;"> Nowadays, understanding the geometry of the loss landscape shows promise in enhancing a model&#39;s generalization ability. In this work, we draw upon prior works that apply geometric principles to optimization and present a novel approach to improve robustness and generalization ability for constrained optimization problems. Indeed, this paper aims to generalize the Sharpness-Aware Minimization (SAM)&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.17215v1-abstract-full').style.display = 'inline'; document.getElementById('2309.17215v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.17215v1-abstract-full" style="display: none;"> Nowadays, understanding the geometry of the loss landscape shows promise in enhancing a model&#39;s generalization ability. In this work, we draw upon prior works that apply geometric principles to optimization and present a novel approach to improve robustness and generalization ability for constrained optimization problems. Indeed, this paper aims to generalize the Sharpness-Aware Minimization (SAM) optimizer to Riemannian manifolds. In doing so, we first extend the concept of sharpness and introduce a novel notion of sharpness on manifolds. To support this notion of sharpness, we present a theoretical analysis characterizing generalization capabilities with respect to manifold sharpness, which demonstrates a tighter bound on the generalization gap, a result not known before. Motivated by this analysis, we introduce our algorithm, Riemannian Sharpness-Aware Minimization (RSAM). To demonstrate RSAM&#39;s ability to enhance generalization ability, we evaluate and contrast our algorithm on a broad set of problems, such as image classification and contrastive learning across different datasets, including CIFAR100, CIFAR10, and FGVCAircraft. Our code is publicly available at \url{https://t.ly/RiemannianSAM}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.17215v1-abstract-full').style.display = 'none'; document.getElementById('2309.17215v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.11475">arXiv:2309.11475</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.11475">pdf</a>, <a href="https://arxiv.org/format/2309.11475">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Creating walls to avoid unwanted points in root finding and optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+T">Tuyen Trung Truong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.11475v3-abstract-short" style="display: inline;"> In root finding and optimization, there are many cases where there is a closed set $A$ one likes that the sequence constructed by one&#39;s favourite method will not converge to A (here, we do not assume extra properties on $A$ such as being convex or connected). For example, if one wants to find roots, and one chooses initial points in the basin of attraction for 1 root $z^*$ (a fact which one may no&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11475v3-abstract-full').style.display = 'inline'; document.getElementById('2309.11475v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.11475v3-abstract-full" style="display: none;"> In root finding and optimization, there are many cases where there is a closed set $A$ one likes that the sequence constructed by one&#39;s favourite method will not converge to A (here, we do not assume extra properties on $A$ such as being convex or connected). For example, if one wants to find roots, and one chooses initial points in the basin of attraction for 1 root $z^*$ (a fact which one may not know before hand), then one will always end up in that root. In this case, one would like to have a mechanism to avoid this point $z^*$ in the next runs of one&#39;s algorithm. Assume that one already has a method IM for optimization (and root finding) for non-constrained optimization. We provide a simple modification IM1 of the method to treat the situation discussed in the previous paragraph. If the method IM has strong theoretical guarantees, then so is IM1. As applications, we prove two theoretical applications: one concerns finding roots of a meromorphic function in an open subset of a Riemann surface, and the other concerns finding local minima of a function in an open subset of a Euclidean space inside it the function has at most countably many critical points. Along the way, we compare with main existing relevant methods in the current literature. We provide several examples in various different settings to illustrate the usefulness of the new approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11475v3-abstract-full').style.display = 'none'; document.getElementById('2309.11475v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages.Improved abstract and exposition. Comments are welcome!</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.03506">arXiv:2309.03506</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.03506">pdf</a>, <a href="https://arxiv.org/format/2309.03506">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust Natural-Looking Mammography Lesion Synthesis on Ipsilateral Dual-Views Breast Cancer Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T">Thanh-Huy Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Kha%2C+Q+H">Quang Hien Kha</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+N+T">Thai Ngoc Toan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Lam%2C+B+T">Ba Thinh Lam</a>, <a href="/search/cs?searchtype=author&amp;query=Ngo%2C+B+H">Ba Hung Ngo</a>, <a href="/search/cs?searchtype=author&amp;query=Dinh%2C+Q+V">Quang Vinh Dinh</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+N+Q+K">Nguyen Quoc Khanh Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.03506v1-abstract-short" style="display: inline;"> In recent years, many mammographic image analysis methods have been introduced for improving cancer classification tasks. Two major issues of mammogram classification tasks are leveraging multi-view mammographic information and class-imbalance handling. In the first problem, many multi-view methods have been released for concatenating features of two or more views for the training and inference st&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.03506v1-abstract-full').style.display = 'inline'; document.getElementById('2309.03506v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.03506v1-abstract-full" style="display: none;"> In recent years, many mammographic image analysis methods have been introduced for improving cancer classification tasks. Two major issues of mammogram classification tasks are leveraging multi-view mammographic information and class-imbalance handling. In the first problem, many multi-view methods have been released for concatenating features of two or more views for the training and inference stage. Having said that, most multi-view existing methods are not explainable in the meaning of feature fusion, and treat many views equally for diagnosing. Our work aims to propose a simple but novel method for enhancing examined view (main view) by leveraging low-level feature information from the auxiliary view (ipsilateral view) before learning the high-level feature that contains the cancerous features. For the second issue, we also propose a simple but novel malignant mammogram synthesis framework for upsampling minor class samples. Our easy-to-implement and no-training framework has eliminated the current limitation of the CutMix algorithm which is unreliable synthesized images with random pasted patches, hard-contour problems, and domain shift problems. Our results on VinDr-Mammo and CMMD datasets show the effectiveness of our two new frameworks for both multi-view training and synthesizing mammographic images, outperforming the previous conventional methods in our experimental settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.03506v1-abstract-full').style.display = 'none'; document.getElementById('2309.03506v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.02197">arXiv:2309.02197</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.02197">pdf</a>, <a href="https://arxiv.org/format/2309.02197">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Delving into Ipsilateral Mammogram Assessment under Multi-View Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+N+T">Thai Ngoc Toan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T">Thanh-Huy Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Lam%2C+B+T">Ba Thinh Lam</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+V+M+D">Vu Minh Duy Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H+P">Hong Phuc Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.02197v2-abstract-short" style="display: inline;"> In many recent years, multi-view mammogram analysis has been focused widely on AI-based cancer assessment. In this work, we aim to explore diverse fusion strategies (average and concatenate) and examine the model&#39;s learning behavior with varying individuals and fusion pathways, involving Coarse Layer and Fine Layer. The Ipsilateral Multi-View Network, comprising five fusion types (Pre, Early, Midd&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02197v2-abstract-full').style.display = 'inline'; document.getElementById('2309.02197v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.02197v2-abstract-full" style="display: none;"> In many recent years, multi-view mammogram analysis has been focused widely on AI-based cancer assessment. In this work, we aim to explore diverse fusion strategies (average and concatenate) and examine the model&#39;s learning behavior with varying individuals and fusion pathways, involving Coarse Layer and Fine Layer. The Ipsilateral Multi-View Network, comprising five fusion types (Pre, Early, Middle, Last, and Post Fusion) in ResNet-18, is employed. Notably, the Middle Fusion emerges as the most balanced and effective approach, enhancing deep-learning models&#39; generalization performance by +2.06% (concatenate) and +5.29% (average) in VinDr-Mammo dataset and +2.03% (concatenate) and +3% (average) in CMMD dataset on macro F1-Score. The paper emphasizes the crucial role of layer assignment in multi-view network extraction with various strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02197v2-abstract-full').style.display = 'none'; document.getElementById('2309.02197v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.11698">arXiv:2306.11698</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.11698">pdf</a>, <a href="https://arxiv.org/format/2306.11698">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> DecodingTrust: A Comprehensive Assessment of Trustworthiness in GPT Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Boxin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Weixin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Pei%2C+H">Hengzhi Pei</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+C">Chulin Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+M">Mintong Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chenhui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+C">Chejian Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zidi Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Dutta%2C+R">Ritik Dutta</a>, <a href="/search/cs?searchtype=author&amp;query=Schaeffer%2C+R">Rylan Schaeffer</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+S+T">Sang T. Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Arora%2C+S">Simran Arora</a>, <a href="/search/cs?searchtype=author&amp;query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&amp;query=Hendrycks%2C+D">Dan Hendrycks</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Zinan Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+Y">Yu Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Koyejo%2C+S">Sanmi Koyejo</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Bo Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.11698v5-abstract-short" style="display: inline;"> Generative Pre-trained Transformer (GPT) models have exhibited exciting progress in their capabilities, capturing the interest of practitioners and the public alike. Yet, while the literature on the trustworthiness of GPT models remains limited, practitioners have proposed employing capable GPT models for sensitive applications such as healthcare and finance -- where mistakes can be costly. To thi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11698v5-abstract-full').style.display = 'inline'; document.getElementById('2306.11698v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.11698v5-abstract-full" style="display: none;"> Generative Pre-trained Transformer (GPT) models have exhibited exciting progress in their capabilities, capturing the interest of practitioners and the public alike. Yet, while the literature on the trustworthiness of GPT models remains limited, practitioners have proposed employing capable GPT models for sensitive applications such as healthcare and finance -- where mistakes can be costly. To this end, this work proposes a comprehensive trustworthiness evaluation for large language models with a focus on GPT-4 and GPT-3.5, considering diverse perspectives -- including toxicity, stereotype bias, adversarial robustness, out-of-distribution robustness, robustness on adversarial demonstrations, privacy, machine ethics, and fairness. Based on our evaluations, we discover previously unpublished vulnerabilities to trustworthiness threats. For instance, we find that GPT models can be easily misled to generate toxic and biased outputs and leak private information in both training data and conversation history. We also find that although GPT-4 is usually more trustworthy than GPT-3.5 on standard benchmarks, GPT-4 is more vulnerable given jailbreaking system or user prompts, potentially because GPT-4 follows (misleading) instructions more precisely. Our work illustrates a comprehensive trustworthiness evaluation of GPT models and sheds light on the trustworthiness gaps. Our benchmark is publicly available at https://decodingtrust.github.io/ ; our dataset can be previewed at https://huggingface.co/datasets/AI-Secure/DecodingTrust ; a concise version of this work is at https://openreview.net/pdf?id=kaHpo8OZw2 . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11698v5-abstract-full').style.display = 'none'; document.getElementById('2306.11698v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2023 Outstanding Paper (Datasets and Benchmarks Track)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.08189">arXiv:2306.08189</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.08189">pdf</a>, <a href="https://arxiv.org/format/2306.08189">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Language models are not naysayers: An analysis of language models on negation benchmarks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+H">Thinh Hung Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a>, <a href="/search/cs?searchtype=author&amp;query=Verspoor%2C+K">Karin Verspoor</a>, <a href="/search/cs?searchtype=author&amp;query=Cohn%2C+T">Trevor Cohn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.08189v1-abstract-short" style="display: inline;"> Negation has been shown to be a major bottleneck for masked language models, such as BERT. However, whether this finding still holds for larger-sized auto-regressive language models (``LLMs&#39;&#39;) has not been studied comprehensively. With the ever-increasing volume of research and applications of LLMs, we take a step back to evaluate the ability of current-generation LLMs to handle negation, a fundam&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08189v1-abstract-full').style.display = 'inline'; document.getElementById('2306.08189v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.08189v1-abstract-full" style="display: none;"> Negation has been shown to be a major bottleneck for masked language models, such as BERT. However, whether this finding still holds for larger-sized auto-regressive language models (``LLMs&#39;&#39;) has not been studied comprehensively. With the ever-increasing volume of research and applications of LLMs, we take a step back to evaluate the ability of current-generation LLMs to handle negation, a fundamental linguistic phenomenon that is central to language understanding. We evaluate different LLMs -- including the open-source GPT-neo, GPT-3, and InstructGPT -- against a wide range of negation benchmarks. Through systematic experimentation with varying model sizes and prompts, we show that LLMs have several limitations including insensitivity to the presence of negation, an inability to capture the lexical semantics of negation, and a failure to reason under negation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08189v1-abstract-full').style.display = 'none'; document.getElementById('2306.08189v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06156">arXiv:2306.06156</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.06156">pdf</a>, <a href="https://arxiv.org/format/2306.06156">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PoET: A generative model of protein families as sequences-of-sequences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+F">Timothy F. Truong Jr</a>, <a href="/search/cs?searchtype=author&amp;query=Bepler%2C+T">Tristan Bepler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06156v3-abstract-short" style="display: inline;"> Generative protein language models are a natural way to design new proteins with desired functions. However, current models are either difficult to direct to produce a protein from a specific family of interest, or must be trained on a large multiple sequence alignment (MSA) from the specific family of interest, making them unable to benefit from transfer learning across families. To address this,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06156v3-abstract-full').style.display = 'inline'; document.getElementById('2306.06156v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06156v3-abstract-full" style="display: none;"> Generative protein language models are a natural way to design new proteins with desired functions. However, current models are either difficult to direct to produce a protein from a specific family of interest, or must be trained on a large multiple sequence alignment (MSA) from the specific family of interest, making them unable to benefit from transfer learning across families. To address this, we propose $\textbf{P}$r$\textbf{o}$tein $\textbf{E}$volutionary $\textbf{T}$ransformer (PoET), an autoregressive generative model of whole protein families that learns to generate sets of related proteins as sequences-of-sequences across tens of millions of natural protein sequence clusters. PoET can be used as a retrieval-augmented language model to generate and score arbitrary modifications conditioned on any protein family of interest, and can extrapolate from short context lengths to generalize well even for small families. This is enabled by a unique Transformer layer; we model tokens sequentially within sequences while attending between sequences order invariantly, allowing PoET to scale to context lengths beyond those used during training. In extensive experiments on deep mutational scanning datasets, we show that PoET outperforms existing protein language models and evolutionary sequence models for variant function prediction across proteins of all MSA depths. We also demonstrate PoET&#39;s ability to controllably generate new protein sequences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06156v3-abstract-full').style.display = 'none'; document.getElementById('2306.06156v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Advances in Neural Information Processing Systems (Vol. 36), 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.05340">arXiv:2306.05340</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.05340">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Research Impact of Solar Panel Cleaning Robot on Photovoltaic Panel&#39;s Deflection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Phan%2C+T+D">Trung Dat Phan</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+M+D">Minh Duc Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Auffray%2C+M">Maxence Auffray</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+N+T">Nhut Thang Le</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+C+T">Cong Toai Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Duong%2C+V+T">Van Tu Duong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H+H">Huy Hung Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T+T">Tan Tien Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.05340v2-abstract-short" style="display: inline;"> In the last few decades, solar panel cleaning robots (SPCR) have been widely used for sanitizing photovoltaic (PV) panels as an effective solution for ensuring PV efficiency. However, the dynamic load generated by the SPCR during operation might have a negative impact on PV panels. To reduce these effects, this paper presents the utilization of ANSYS software to simulate multiple scenarios involvi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.05340v2-abstract-full').style.display = 'inline'; document.getElementById('2306.05340v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.05340v2-abstract-full" style="display: none;"> In the last few decades, solar panel cleaning robots (SPCR) have been widely used for sanitizing photovoltaic (PV) panels as an effective solution for ensuring PV efficiency. However, the dynamic load generated by the SPCR during operation might have a negative impact on PV panels. To reduce these effects, this paper presents the utilization of ANSYS software to simulate multiple scenarios involving the impact of SPCR on PV panels. The simulation scenarios provided in the paper are derived from the typical movements of SPCR observed during practical operations. The simulation results show the deformation process of PV panels, and a second-order polynomial is established to describe the deformed amplitude along the centerline of PV panels. This second-order polynomial contributes to the design process of a damper system for SPCR aiming to reduce the influence of SPCR on PV panels. Moreover, the experiments are conducted to examine the correlation between the results of the simulation and the experiment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.05340v2-abstract-full').style.display = 'none'; document.getElementById('2306.05340v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures, The 4th International Conference on Applied Convergence Engineering (ICACE 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.04739">arXiv:2306.04739</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.04739">pdf</a>, <a href="https://arxiv.org/format/2306.04739">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automatic retrieval of corresponding US views in longitudinal examinations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kerdegari%2C+H">Hamideh Kerdegari</a>, <a href="/search/cs?searchtype=author&amp;query=Phung1%2C+T+H+N">Tran Huy Nhat Phung1</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+V+H">Van Hao Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+P+T">Thi Phuong Thao Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+N+M+T">Ngoc Minh Thu Le</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T+P">Thanh Phuong Le</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T+M+T">Thi Mai Thao Le</a>, <a href="/search/cs?searchtype=author&amp;query=Pisani%2C+L">Luigi Pisani</a>, <a href="/search/cs?searchtype=author&amp;query=Denehy%2C+L">Linda Denehy</a>, <a href="/search/cs?searchtype=author&amp;query=Consortium%2C+V">Vital Consortium</a>, <a href="/search/cs?searchtype=author&amp;query=Razavi%2C+R">Reza Razavi</a>, <a href="/search/cs?searchtype=author&amp;query=Thwaites%2C+L">Louise Thwaites</a>, <a href="/search/cs?searchtype=author&amp;query=Yacoub%2C+S">Sophie Yacoub</a>, <a href="/search/cs?searchtype=author&amp;query=King%2C+A+P">Andrew P. King</a>, <a href="/search/cs?searchtype=author&amp;query=Gomez%2C+A">Alberto Gomez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.04739v1-abstract-short" style="display: inline;"> Skeletal muscle atrophy is a common occurrence in critically ill patients in the intensive care unit (ICU) who spend long periods in bed. Muscle mass must be recovered through physiotherapy before patient discharge and ultrasound imaging is frequently used to assess the recovery process by measuring the muscle size over time. However, these manual measurements are subject to large variability, par&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04739v1-abstract-full').style.display = 'inline'; document.getElementById('2306.04739v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.04739v1-abstract-full" style="display: none;"> Skeletal muscle atrophy is a common occurrence in critically ill patients in the intensive care unit (ICU) who spend long periods in bed. Muscle mass must be recovered through physiotherapy before patient discharge and ultrasound imaging is frequently used to assess the recovery process by measuring the muscle size over time. However, these manual measurements are subject to large variability, particularly since the scans are typically acquired on different days and potentially by different operators. In this paper, we propose a self-supervised contrastive learning approach to automatically retrieve similar ultrasound muscle views at different scan times. Three different models were compared using data from 67 patients acquired in the ICU. Results indicate that our contrastive model outperformed a supervised baseline model in the task of view retrieval with an AUC of 73.52% and when combined with an automatic segmentation model achieved 5.7%+/-0.24% error in cross-sectional area. Furthermore, a user study survey confirmed the efficacy of our model for muscle view retrieval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04739v1-abstract-full').style.display = 'none'; document.getElementById('2306.04739v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15700">arXiv:2305.15700</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.15700">pdf</a>, <a href="https://arxiv.org/format/2305.15700">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fairness Continual Learning Approach to Semantic Scene Understanding in Open-World Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H">Hoang-Quan Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Raj%2C+B">Bhiksha Raj</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15700v4-abstract-short" style="display: inline;"> Continual semantic segmentation aims to learn new classes while maintaining the information from the previous classes. Although prior studies have shown impressive progress in recent years, the fairness concern in the continual semantic segmentation needs to be better addressed. Meanwhile, fairness is one of the most vital factors in deploying the deep learning model, especially in human-related o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15700v4-abstract-full').style.display = 'inline'; document.getElementById('2305.15700v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15700v4-abstract-full" style="display: none;"> Continual semantic segmentation aims to learn new classes while maintaining the information from the previous classes. Although prior studies have shown impressive progress in recent years, the fairness concern in the continual semantic segmentation needs to be better addressed. Meanwhile, fairness is one of the most vital factors in deploying the deep learning model, especially in human-related or safety applications. In this paper, we present a novel Fairness Continual Learning approach to the semantic segmentation problem. In particular, under the fairness objective, a new fairness continual learning framework is proposed based on class distributions. Then, a novel Prototypical Contrastive Clustering loss is proposed to address the significant challenges in continual learning, i.e., catastrophic forgetting and background shift. Our proposed loss has also been proven as a novel, generalized learning paradigm of knowledge distillation commonly used in continual learning. Moreover, the proposed Conditional Structural Consistency loss further regularized the structural constraint of the predicted segmentation. Our proposed approach has achieved State-of-the-Art performance on three standard scene understanding benchmarks, i.e., ADE20K, Cityscapes, and Pascal VOC, and promoted the fairness of the segmentation model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15700v4-abstract-full').style.display = 'none'; document.getElementById('2305.15700v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15699">arXiv:2305.15699</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.15699">pdf</a>, <a href="https://arxiv.org/format/2305.15699">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cross-view Action Recognition Understanding From Exocentric to Egocentric Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15699v3-abstract-short" style="display: inline;"> Understanding action recognition in egocentric videos has emerged as a vital research topic with numerous practical applications. With the limitation in the scale of egocentric data collection, learning robust deep learning-based action recognition models remains difficult. Transferring knowledge learned from the large-scale exocentric data to the egocentric data is challenging due to the differen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15699v3-abstract-full').style.display = 'inline'; document.getElementById('2305.15699v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15699v3-abstract-full" style="display: none;"> Understanding action recognition in egocentric videos has emerged as a vital research topic with numerous practical applications. With the limitation in the scale of egocentric data collection, learning robust deep learning-based action recognition models remains difficult. Transferring knowledge learned from the large-scale exocentric data to the egocentric data is challenging due to the difference in videos across views. Our work introduces a novel cross-view learning approach to action recognition (CVAR) that effectively transfers knowledge from the exocentric to the selfish view. First, we present a novel geometric-based constraint into the self-attention mechanism in Transformer based on analyzing the camera positions between two views. Then, we propose a new cross-view self-attention loss learned on unpaired cross-view data to enforce the self-attention mechanism learning to transfer knowledge across views. Finally, to further improve the performance of our cross-view learning approach, we present the metrics to measure the correlations in videos and attention maps effectively. Experimental results on standard egocentric action recognition benchmarks, i.e., Charades-Ego, EPIC-Kitchens-55, and EPIC-Kitchens-100, have shown our approach&#39;s effectiveness and state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15699v3-abstract-full').style.display = 'none'; document.getElementById('2305.15699v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13693">arXiv:2305.13693</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.13693">pdf</a>, <a href="https://arxiv.org/format/2305.13693">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Automated Metrics for Medical Multi-Document Summarization Disagree with Human Evaluations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Otmakhova%2C+Y">Yulia Otmakhova</a>, <a href="/search/cs?searchtype=author&amp;query=DeYoung%2C+J">Jay DeYoung</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+H">Thinh Hung Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Kuehl%2C+B+E">Bailey E. Kuehl</a>, <a href="/search/cs?searchtype=author&amp;query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/cs?searchtype=author&amp;query=Wallace%2C+B+C">Byron C. Wallace</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13693v1-abstract-short" style="display: inline;"> Evaluating multi-document summarization (MDS) quality is difficult. This is especially true in the case of MDS for biomedical literature reviews, where models must synthesize contradicting evidence reported across different documents. Prior work has shown that rather than performing the task, models may exploit shortcuts that are difficult to detect using standard n-gram similarity metrics such as&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13693v1-abstract-full').style.display = 'inline'; document.getElementById('2305.13693v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13693v1-abstract-full" style="display: none;"> Evaluating multi-document summarization (MDS) quality is difficult. This is especially true in the case of MDS for biomedical literature reviews, where models must synthesize contradicting evidence reported across different documents. Prior work has shown that rather than performing the task, models may exploit shortcuts that are difficult to detect using standard n-gram similarity metrics such as ROUGE. Better automated evaluation metrics are needed, but few resources exist to assess metrics when they are proposed. Therefore, we introduce a dataset of human-assessed summary quality facets and pairwise preferences to encourage and support the development of better automated evaluation methods for literature review MDS. We take advantage of community submissions to the Multi-document Summarization for Literature Review (MSLR) shared task to compile a diverse and representative sample of generated summaries. We analyze how automated summarization evaluation metrics correlate with lexical features of generated summaries, to other automated metrics including several we propose in this work, and to aspects of human-assessed summary quality. We find that not only do automated metrics fail to capture aspects of quality as assessed by humans, in many cases the system rankings produced by these metrics are anti-correlated with rankings according to human annotators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13693v1-abstract-full').style.display = 'none'; document.getElementById('2305.13693v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2023; Github: https://github.com/allenai/mslr-annotated-dataset</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12121">arXiv:2305.12121</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.12121">pdf</a>, <a href="https://arxiv.org/format/2305.12121">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ACA-Net: Towards Lightweight Speaker Verification using Asymmetric Cross Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yip%2C+J+Q">Jia Qi Yip</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Tuan Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Ng%2C+D">Dianwen Ng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yukun Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T+H">Trung Hieu Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Ni%2C+C">Chongjia Ni</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Shengkui Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Chng%2C+E+S">Eng Siong Chng</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+B">Bin Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12121v1-abstract-short" style="display: inline;"> In this paper, we propose ACA-Net, a lightweight, global context-aware speaker embedding extractor for Speaker Verification (SV) that improves upon existing work by using Asymmetric Cross Attention (ACA) to replace temporal pooling. ACA is able to distill large, variable-length sequences into small, fixed-sized latents by attending a small query to large key and value matrices. In ACA-Net, we buil&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12121v1-abstract-full').style.display = 'inline'; document.getElementById('2305.12121v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12121v1-abstract-full" style="display: none;"> In this paper, we propose ACA-Net, a lightweight, global context-aware speaker embedding extractor for Speaker Verification (SV) that improves upon existing work by using Asymmetric Cross Attention (ACA) to replace temporal pooling. ACA is able to distill large, variable-length sequences into small, fixed-sized latents by attending a small query to large key and value matrices. In ACA-Net, we build a Multi-Layer Aggregation (MLA) block using ACA to generate fixed-sized identity vectors from variable-length inputs. Through global attention, ACA-Net acts as an efficient global feature extractor that adapts to temporal variability unlike existing SV models that apply a fixed function for pooling over the temporal dimension which may obscure information about the signal&#39;s non-stationary temporal variability. Our experiments on the WSJ0-1talker show ACA-Net outperforms a strong baseline by 5\% relative improvement in EER using only 1/5 of the parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12121v1-abstract-full').style.display = 'none'; document.getElementById('2305.12121v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to INTERSPEECH 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.07372">arXiv:2304.07372</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.07372">pdf</a>, <a href="https://arxiv.org/format/2304.07372">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CoMaL: Conditional Maximum Likelihood Approach to Self-supervised Domain Adaptation in Long-tail Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Duong%2C+C+N">Chi Nhan Duong</a>, <a href="/search/cs?searchtype=author&amp;query=Helton%2C+P">Pierce Helton</a>, <a href="/search/cs?searchtype=author&amp;query=Dowling%2C+A">Ashley Dowling</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.07372v1-abstract-short" style="display: inline;"> The research in self-supervised domain adaptation in semantic segmentation has recently received considerable attention. Although GAN-based methods have become one of the most popular approaches to domain adaptation, they have suffered from some limitations. They are insufficient to model both global and local structures of a given image, especially in small regions of tail classes. Moreover, they&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07372v1-abstract-full').style.display = 'inline'; document.getElementById('2304.07372v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.07372v1-abstract-full" style="display: none;"> The research in self-supervised domain adaptation in semantic segmentation has recently received considerable attention. Although GAN-based methods have become one of the most popular approaches to domain adaptation, they have suffered from some limitations. They are insufficient to model both global and local structures of a given image, especially in small regions of tail classes. Moreover, they perform bad on the tail classes containing limited number of pixels or less training samples. In order to address these issues, we present a new self-supervised domain adaptation approach to tackle long-tail semantic segmentation in this paper. Firstly, a new metric is introduced to formulate long-tail domain adaptation in the segmentation problem. Secondly, a new Conditional Maximum Likelihood (CoMaL) approach in an autoregressive framework is presented to solve the problem of long-tail domain adaptation. Although other segmentation methods work under the pixel independence assumption, the long-tailed pixel distributions in CoMaL are generally solved in the context of structural dependency, as that is more realistic. Finally, the proposed method is evaluated on popular large-scale semantic segmentation benchmarks, i.e., &#34;SYNTHIA to Cityscapes&#34; and &#34;GTA to Cityscapes&#34;, and outperforms the prior methods by a large margin in both the standard and the proposed evaluation protocols. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07372v1-abstract-full').style.display = 'none'; document.getElementById('2304.07372v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.07199">arXiv:2304.07199</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.07199">pdf</a>, <a href="https://arxiv.org/format/2304.07199">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CROVIA: Seeing Drone Scenes from Car Perspective via Cross-View Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Duong%2C+C+N">Chi Nhan Duong</a>, <a href="/search/cs?searchtype=author&amp;query=Dowling%2C+A">Ashley Dowling</a>, <a href="/search/cs?searchtype=author&amp;query=Phung%2C+S+L">Son Lam Phung</a>, <a href="/search/cs?searchtype=author&amp;query=Cothren%2C+J">Jackson Cothren</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.07199v1-abstract-short" style="display: inline;"> Understanding semantic scene segmentation of urban scenes captured from the Unmanned Aerial Vehicles (UAV) perspective plays a vital role in building a perception model for UAV. With the limitations of large-scale densely labeled data, semantic scene segmentation for UAV views requires a broad understanding of an object from both its top and side views. Adapting from well-annotated autonomous driv&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07199v1-abstract-full').style.display = 'inline'; document.getElementById('2304.07199v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.07199v1-abstract-full" style="display: none;"> Understanding semantic scene segmentation of urban scenes captured from the Unmanned Aerial Vehicles (UAV) perspective plays a vital role in building a perception model for UAV. With the limitations of large-scale densely labeled data, semantic scene segmentation for UAV views requires a broad understanding of an object from both its top and side views. Adapting from well-annotated autonomous driving data to unlabeled UAV data is challenging due to the cross-view differences between the two data types. Our work proposes a novel Cross-View Adaptation (CROVIA) approach to effectively adapt the knowledge learned from on-road vehicle views to UAV views. First, a novel geometry-based constraint to cross-view adaptation is introduced based on the geometry correlation between views. Second, cross-view correlations from image space are effectively transferred to segmentation space without any requirement of paired on-road and UAV view data via a new Geometry-Constraint Cross-View (GeiCo) loss. Third, the multi-modal bijective networks are introduced to enforce the global structural modeling across views. Experimental results on new cross-view adaptation benchmarks introduced in this work, i.e., SYNTHIA to UAVID and GTA5 to UAVID, show the State-of-the-Art (SOTA) performance of our approach over prior adaptation methods <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07199v1-abstract-full').style.display = 'none'; document.getElementById('2304.07199v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.02135">arXiv:2304.02135</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.02135">pdf</a>, <a href="https://arxiv.org/format/2304.02135">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FREDOM: Fairness Domain Adaptation Approach to Semantic Scene Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Thanh-Dat Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+N">Ngan Le</a>, <a href="/search/cs?searchtype=author&amp;query=Raj%2C+B">Bhiksha Raj</a>, <a href="/search/cs?searchtype=author&amp;query=Cothren%2C+J">Jackson Cothren</a>, <a href="/search/cs?searchtype=author&amp;query=Luu%2C+K">Khoa Luu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.02135v1-abstract-short" style="display: inline;"> Although Domain Adaptation in Semantic Scene Segmentation has shown impressive improvement in recent years, the fairness concerns in the domain adaptation have yet to be well defined and addressed. In addition, fairness is one of the most critical aspects when deploying the segmentation models into human-related real-world applications, e.g., autonomous driving, as any unfair predictions could inf&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.02135v1-abstract-full').style.display = 'inline'; document.getElementById('2304.02135v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.02135v1-abstract-full" style="display: none;"> Although Domain Adaptation in Semantic Scene Segmentation has shown impressive improvement in recent years, the fairness concerns in the domain adaptation have yet to be well defined and addressed. In addition, fairness is one of the most critical aspects when deploying the segmentation models into human-related real-world applications, e.g., autonomous driving, as any unfair predictions could influence human safety. In this paper, we propose a novel Fairness Domain Adaptation (FREDOM) approach to semantic scene segmentation. In particular, from the proposed formulated fairness objective, a new adaptation framework will be introduced based on the fair treatment of class distributions. Moreover, to generally model the context of structural dependency, a new conditional structural constraint is introduced to impose the consistency of predicted segmentation. Thanks to the proposed Conditional Structure Network, the self-attention mechanism has sufficiently modeled the structural information of segmentation. Through the ablation studies, the proposed method has shown the performance improvement of the segmentation models and promoted fairness in the model predictions. The experimental results on the two standard benchmarks, i.e., SYNTHIA $\to$ Cityscapes and GTA5 $\to$ Cityscapes, have shown that our method achieved State-of-the-Art (SOTA) performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.02135v1-abstract-full').style.display = 'none'; document.getElementById('2304.02135v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR&#39;23</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.09782">arXiv:2303.09782</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.09782">pdf</a>, <a href="https://arxiv.org/format/2303.09782">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> High Accurate and Explainable Multi-Pill Detection Framework with Graph Neural Network-Assisted Multimodal Data Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+A+D">Anh Duy Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Pham%2C+H+H">Huy Hieu Pham</a>, <a href="/search/cs?searchtype=author&amp;query=Trung%2C+H+T">Huynh Thanh Trung</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+Q+V+H">Quoc Viet Hung Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T+N">Thao Nguyen Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+P+L">Phi Le Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.09782v1-abstract-short" style="display: inline;"> Due to the significant resemblance in visual appearance, pill misuse is prevalent and has become a critical issue, responsible for one-third of all deaths worldwide. Pill identification, thus, is a crucial concern needed to be investigated thoroughly. Recently, several attempts have been made to exploit deep learning to tackle the pill identification problem. However, most published works consider&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.09782v1-abstract-full').style.display = 'inline'; document.getElementById('2303.09782v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.09782v1-abstract-full" style="display: none;"> Due to the significant resemblance in visual appearance, pill misuse is prevalent and has become a critical issue, responsible for one-third of all deaths worldwide. Pill identification, thus, is a crucial concern needed to be investigated thoroughly. Recently, several attempts have been made to exploit deep learning to tackle the pill identification problem. However, most published works consider only single-pill identification and fail to distinguish hard samples with identical appearances. Also, most existing pill image datasets only feature single pill images captured in carefully controlled environments under ideal lighting conditions and clean backgrounds. In this work, we are the first to tackle the multi-pill detection problem in real-world settings, aiming at localizing and identifying pills captured by users in a pill intake. Moreover, we also introduce a multi-pill image dataset taken in unconstrained conditions. To handle hard samples, we propose a novel method for constructing heterogeneous a priori graphs incorporating three forms of inter-pill relationships, including co-occurrence likelihood, relative size, and visual semantic correlation. We then offer a framework for integrating a priori with pills&#39; visual features to enhance detection accuracy. Our experimental results have proved the robustness, reliability, and explainability of the proposed framework. Experimentally, it outperforms all detection benchmarks in terms of all evaluation metrics. Specifically, our proposed framework improves COCO mAP metrics by 9.4% over Faster R-CNN and 12.0% compared to vanilla YOLOv5. Our study opens up new opportunities for protecting patients from medication errors using an AI-based pill identification solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.09782v1-abstract-full').style.display = 'none'; document.getElementById('2303.09782v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review by Plos ONE journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.11427">arXiv:2302.11427</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.11427">pdf</a>, <a href="https://arxiv.org/format/2302.11427">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhanced Face Authentication With Separate Loss Functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Duong%2C+A">Anh-Kiet Duong</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H">Hoang-Lan Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+T">Toan-Thinh Truong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.11427v2-abstract-short" style="display: inline;"> The overall objective of the main project is to propose and develop a system of facial authentication in unlocking phones or applications in phones using facial recognition. The system will include four separate architectures: face detection, face recognition, face spoofing, and classification of closed eyes. In which, we consider the problem of face recognition to be the most important, determini&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11427v2-abstract-full').style.display = 'inline'; document.getElementById('2302.11427v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.11427v2-abstract-full" style="display: none;"> The overall objective of the main project is to propose and develop a system of facial authentication in unlocking phones or applications in phones using facial recognition. The system will include four separate architectures: face detection, face recognition, face spoofing, and classification of closed eyes. In which, we consider the problem of face recognition to be the most important, determining the true identity of the person standing in front of the screen with absolute accuracy is what facial recognition systems need to achieve. Along with the development of the face recognition problem, the problem of the anti-fake face is also gradually becoming popular and equally important. Our goal is to propose and develop two loss functions: LMCot and Double Loss. Then apply them to the face authentication process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.11427v2-abstract-full').style.display = 'none'; document.getElementById('2302.11427v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">in Vietnamese language</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.06287">arXiv:2301.06287</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.06287">pdf</a>, <a href="https://arxiv.org/format/2301.06287">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> A Multi-Platform Collection of Social Media Posts about the 2022 U.S. Midterm Elections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Aiyappa%2C+R">Rachith Aiyappa</a>, <a href="/search/cs?searchtype=author&amp;query=DeVerna%2C+M+R">Matthew R. DeVerna</a>, <a href="/search/cs?searchtype=author&amp;query=Pote%2C+M">Manita Pote</a>, <a href="/search/cs?searchtype=author&amp;query=Truong%2C+B+T">Bao Tran Truong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W">Wanying Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Axelrod%2C+D">David Axelrod</a>, <a href="/search/cs?searchtype=author&amp;query=Pessianzadeh%2C+A">Aria Pessianzadeh</a>, <a href="/search/cs?searchtype=author&amp;query=Kachwala%2C+Z">Zoher Kachwala</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+M">Munjung Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Seckin%2C+O+C">Ozgur Can Seckin</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+M">Minsuk Kim</a>, <a href="/search/cs?searchtype=author&amp;query=Gandhi%2C+S">Sunny Gandhi</a>, <a href="/search/cs?searchtype=author&amp;query=Manikonda%2C+A">Amrutha Manikonda</a>, <a href="/search/cs?searchtype=author&amp;query=Pierri%2C+F">Francesco Pierri</a>, <a href="/search/cs?searchtype=author&amp;query=Menczer%2C+F">Filippo Menczer</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+K">Kai-Cheng Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.06287v2-abstract-short" style="display: inline;"> Social media are utilized by millions of citizens to discuss important political issues. Politicians use these platforms to connect with the public and broadcast policy positions. Therefore, data from social media has enabled many studies of political discussion. While most analyses are limited to data from individual platforms, people are embedded in a larger information ecosystem spanning multip&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.06287v2-abstract-full').style.display = 'inline'; document.getElementById('2301.06287v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.06287v2-abstract-full" style="display: none;"> Social media are utilized by millions of citizens to discuss important political issues. Politicians use these platforms to connect with the public and broadcast policy positions. Therefore, data from social media has enabled many studies of political discussion. While most analyses are limited to data from individual platforms, people are embedded in a larger information ecosystem spanning multiple social networks. Here we describe and provide access to the Indiana University 2022 U.S. Midterms Multi-Platform Social Media Dataset (MEIU22), a collection of social media posts from Twitter, Facebook, Instagram, Reddit, and 4chan. MEIU22 links to posts about the midterm elections based on a comprehensive list of keywords and tracks the social media accounts of 1,011 candidates from October 1 to December 25, 2022. We also publish the source code of our pipeline to enable similar multi-platform research projects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.06287v2-abstract-full').style.display = 'none'; document.getElementById('2301.06287v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures, forthcoming in ICWSM23</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Truong%2C+T&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10